# Usuarios y títulos

In [1]:
import ast
import csv
import itertools
import os
import re
from collections import Counter
from collections import defaultdict
from datetime import datetime
from datetime import timedelta
from string import punctuation

import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from pandas.tslib import Timestamp

## Préstamos

In [2]:
%%time

merge_df = pd.read_csv('merge.csv', header=0)
merge_df = merge_df.fillna('')

CPU times: user 1.48 s, sys: 68.4 ms, total: 1.54 s
Wall time: 1.54 s


In [3]:
merge_df.shape

(516206, 12)

In [4]:
merge_df.head()

Unnamed: 0,TITULO,AUTOR,DEWEY,FECHA_PUB,PAIS_EJEMPLAR,COD_BARRAS_LECTOR,CODBAR,Sucursal,Tratamiento,Edad,Ciudad,Rango_edad
0,Johannes Gutenberg,"Borràs Perelló, Lluís.",925,2010,Spain,88007424,88007424,ARM,sr.,43,Armenia,40-49
1,Genealogía de una bruja,"Lacombe, Benjamin, 1982-",843,2009,Spain,88007424,88007424,ARM,sr.,43,Armenia,40-49
2,La independencia de Colombia : así fue,"Vasco, Irene, 1952-",986,2009,Colombia,88007424,88007424,ARM,sr.,43,Armenia,40-49
3,Maravillas del mundo,"Steele, Philip.",910,2007,Mexico,88007424,88007424,ARM,sr.,43,Armenia,40-49
4,No te gustaría ser un adivino maya,"Matthews, Rupert.",972,2008,Colombia,88007424,88007424,ARM,sr.,43,Armenia,40-49


## Thresholds

In [5]:
THRESHOLD_GROUPS = 100

In [6]:
THRESHOLD_DEWEY = 50

In [7]:
TOP_GROUP = 10

## Groups

In [8]:
group_count_sr = merge_df.groupby(['Ciudad', 'Tratamiento', 'Rango_edad']).CODBAR.count()

In [9]:
len(group_count_sr)

333

In [10]:
sorted_group_count_sr = group_count_sr.sort_values(ascending=False)

In [11]:
pd.DataFrame(sorted_group_count_sr)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,CODBAR
Ciudad,Tratamiento,Rango_edad,Unnamed: 3_level_1
Bogotá,sr.,18-29,46986
Bogotá,sra.,18-29,42716
Bogotá,sr.,30-39,31544
Tunja,sra.,-17,26232
Bogotá,sra.,30-39,22022
Tunja,sr.,-17,17622
Bogotá,sr.,50-64,16693
Bogotá,sr.,40-49,16089
Popayán,sra.,-17,12217
Popayán,sr.,-17,8630


In [12]:
filtered_group_count_sr = group_count_sr[lambda x: x >= THRESHOLD_GROUPS]

In [13]:
len(filtered_group_count_sr)

245

In [14]:
sorted_filtered_group_count_sr = filtered_group_count_sr.sort_values(ascending=False)

In [15]:
pd.DataFrame(sorted_filtered_group_count_sr.head())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,CODBAR
Ciudad,Tratamiento,Rango_edad,Unnamed: 3_level_1
Bogotá,sr.,18-29,46986
Bogotá,sra.,18-29,42716
Bogotá,sr.,30-39,31544
Tunja,sra.,-17,26232
Bogotá,sra.,30-39,22022


## Dewey

In [16]:
dewey_count_sr = merge_df.groupby(['Ciudad', 'Tratamiento', 'Rango_edad', 'DEWEY']).CODBAR.count()

In [17]:
len(dewey_count_sr)

39915

In [18]:
sorted_dewey_count_sr = dewey_count_sr.sort_values(ascending=False)

In [19]:
pd.DataFrame(sorted_dewey_count_sr)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,CODBAR
Ciudad,Tratamiento,Rango_edad,DEWEY,Unnamed: 4_level_1
Tunja,sra.,-17,863,6719
Tunja,sr.,-17,863,3960
Popayán,sra.,-17,863,3061
Tunja,sra.,-17,813,2700
Tunja,sra.,-17,843,2626
Bogotá,sra.,18-29,863,2593
Tunja,sra.,-17,823,2279
Bogotá,sr.,18-29,863,2081
Popayán,sr.,-17,863,1996
Tunja,sr.,-17,813,1850


In [20]:
filtered_dewey_count_sr = dewey_count_sr[lambda x: x >= THRESHOLD_DEWEY]

In [21]:
len(filtered_dewey_count_sr)

1788

In [22]:
sorted_filtered_dewey_count_sr = filtered_dewey_count_sr.sort_values(ascending=False)

In [23]:
pd.DataFrame(sorted_filtered_dewey_count_sr.head())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,CODBAR
Ciudad,Tratamiento,Rango_edad,DEWEY,Unnamed: 4_level_1
Tunja,sra.,-17,863,6719
Tunja,sr.,-17,863,3960
Popayán,sra.,-17,863,3061
Tunja,sra.,-17,813,2700
Tunja,sra.,-17,843,2626


## Count

### Global

In [24]:
%%time

ciudades = merge_df.Ciudad.unique()
tratamientos = merge_df.Tratamiento.unique()
rangos_edades = merge_df.Rango_edad.unique()

indexes = []
for ciudad in ciudades:
    for tratamiento in tratamientos:
        for rango_edad in rangos_edades:
            index = (ciudad, tratamiento, rango_edad)
            if index in filtered_group_count_sr and index in filtered_dewey_count_sr:
                sr = dewey_count_sr[index]
                sorted_sr = sr.sort_values(ascending=False).head(TOP_GROUP)
                for dewey in sorted_sr.index:
                    indexes.append(index + (dewey,))

CPU times: user 332 ms, sys: 0 ns, total: 332 ms
Wall time: 331 ms


In [25]:
selected_dewey_count_sr = dewey_count_sr[indexes]

In [26]:
len(selected_dewey_count_sr)

1520

In [27]:
selected_dewey_count_sr.head()

Ciudad   Tratamiento  Rango_edad  DEWEY
Armenia  sr.          -17         843      425
                                  863      365
                                  813      281
                                  823      237
                                  833      204
Name: CODBAR, dtype: int64

### Local

In [28]:
from IPython.display import display

In [29]:
pd.set_option('display.max_rows', 120)

def process_group_count(x):
    index = x.name[:2]
    count = group_count_sr[(ciudad, ) + index]
    return count

ciudades = selected_dewey_count_sr.index.levels[0]

for ciudad in ciudades:
    sr = selected_dewey_count_sr[ciudad]
    df = pd.DataFrame(sr)
    df['Cuenta_grupo'] = df.apply(process_group_count, axis=1)
    df = df.rename(columns={'CODBAR': 'Cuenta_dewey'})
    print(ciudad)
    display(df)

pd.reset_option('display.max_rows')

Armenia


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,-17,843,425,2793
sr.,-17,863,365,2793
sr.,-17,813,281,2793
sr.,-17,823,237,2793
sr.,-17,833,204,2793
sr.,-17,028,109,2793
sr.,-17,741,106,2793
sr.,-17,808,95,2793
sr.,-17,372,85,2793
sr.,-17,853,62,2793


Barranquilla


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,40-49,Videograbaciones en sala/pres.,179,643
sr.,40-49,863,47,643
sr.,40-49,813,34,643
sr.,40-49,658,33,643
sr.,40-49,613,18,643
sr.,40-49,823,16,643
sr.,40-49,741,13,643
sr.,40-49,808,10,643
sr.,40-49,843,10,643
sr.,40-49,153,9,643


Bogotá


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,40-49,Videograbaciones en sala/pres.,649,16089
sr.,40-49,863,608,16089
sr.,40-49,658,555,16089
sr.,40-49,Registro sonoro en sala/pres.,346,16089
sr.,40-49,813,341,16089
sr.,40-49,320,247,16089
sr.,40-49,301,236,16089
sr.,40-49,808,231,16089
sr.,40-49,616,225,16089
sr.,40-49,370,220,16089


Bucaramanga


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,40-49,Videograbaciones en sala/pres.,140,2057
sr.,40-49,Registro sonoro en sala/pres.,127,2057
sr.,40-49,863,109,2057
sr.,40-49,823,85,2057
sr.,40-49,371,63,2057
sr.,40-49,813,58,2057
sr.,40-49,741,58,2057
sr.,40-49,370,44,2057
sr.,40-49,920,41,2057
sr.,40-49,780,38,2057


Buenaventura


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sra.,40-49,Videograbaciones en sala/pres.,86,662
sra.,40-49,863,54,662
sra.,40-49,658,30,662
sra.,40-49,Registro sonoro en sala/pres.,26,662
sra.,40-49,371,23,662
sra.,40-49,641,15,662
sra.,40-49,808,14,662
sra.,40-49,370,13,662
sra.,40-49,372,12,662
sra.,40-49,869,12,662


Cali


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,50-64,Registro sonoro en sala/pres.,76,752
sr.,50-64,863,75,752
sr.,50-64,741,66,752
sr.,50-64,861,33,752
sr.,50-64,342,28,752
sr.,50-64,813,26,752
sr.,50-64,823,25,752
sr.,50-64,843,24,752
sr.,50-64,340,20,752
sr.,50-64,615,15,752


Cartagena


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,40-49,342,63,921
sr.,40-49,331,57,921
sr.,40-49,863,48,921
sr.,40-49,347,45,921
sr.,40-49,340,44,921
sr.,40-49,345,32,921
sr.,40-49,344,27,921
sr.,40-49,Videograbaciones en sala/pres.,25,921
sr.,40-49,343,21,921
sr.,40-49,346,19,921


Cúcuta


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,40-49,657,61,176
sr.,40-49,005,45,176
sr.,40-49,Registro sonoro en sala/pres.,30,176
sr.,40-49,Videograbaciones en sala/pres.,7,176
sr.,40-49,Videograbación I. en sala/pres,5,176
sr.,40-49,823,4,176
sr.,40-49,796,4,176
sr.,40-49,428,4,176
sr.,40-49,336,3,176
sr.,40-49,347,2,176


Florencia


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,-17,863,426,2394
sr.,-17,813,197,2394
sr.,-17,843,192,2394
sr.,-17,823,180,2394
sr.,-17,028,139,2394
sr.,-17,808,115,2394
sr.,-17,372,105,2394
sr.,-17,833,102,2394
sr.,-17,Videograbaciones en sala/pres.,94,2394
sr.,-17,861,54,2394


Girardot


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,-17,863,216,1454
sr.,-17,813,121,1454
sr.,-17,823,111,1454
sr.,-17,843,106,1454
sr.,-17,Videograbaciones en sala/pres.,86,1454
sr.,-17,833,56,1454
sr.,-17,808,48,1454
sr.,-17,372,47,1454
sr.,-17,741,46,1454
sr.,-17,028,43,1454


Honda


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,-17,863,236,1394
sr.,-17,843,120,1394
sr.,-17,Videograbaciones en sala/pres.,104,1394
sr.,-17,823,90,1394
sr.,-17,028,86,1394
sr.,-17,813,73,1394
sr.,-17,833,66,1394
sr.,-17,808,55,1394
sr.,-17,372,43,1394
sr.,-17,741,37,1394


Ibagué


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,40-49,863,62,466
sr.,40-49,813,27,466
sr.,40-49,301,26,466
sr.,40-49,868,18,466
sr.,40-49,302,16,466
sr.,40-49,070,15,466
sr.,40-49,986,12,466
sr.,40-49,323,12,466
sr.,40-49,303,11,466
sr.,40-49,923,11,466


Ipiales


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,-17,863,782,3834
sr.,-17,843,531,3834
sr.,-17,813,232,3834
sr.,-17,823,185,3834
sr.,-17,808,156,3834
sr.,-17,741,141,3834
sr.,-17,833,97,3834
sr.,-17,Videograbaciones en sala/pres.,93,3834
sr.,-17,028,77,3834
sr.,-17,861,73,3834


Leticia


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,40-49,Videograbaciones en sala/pres.,112,177
sr.,40-49,986,9,177
sr.,40-49,863,6,177
sr.,40-49,343,4,177
sr.,40-49,347,4,177
sr.,40-49,813,3,177
sr.,40-49,833,2,177
sr.,40-49,345,2,177
sr.,40-49,370,2,177
sr.,40-49,819,2,177


Manizales


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,40-49,Videograbaciones en sala/pres.,84,616
sr.,40-49,813,43,616
sr.,40-49,823,41,616
sr.,40-49,863,28,616
sr.,40-49,986,24,616
sr.,40-49,741,18,616
sr.,40-49,882,18,616
sr.,40-49,843,17,616
sr.,40-49,787,17,616
sr.,40-49,780,15,616


Medellín


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,40-49,863,117,1026
sr.,40-49,782,55,1026
sr.,40-49,927,51,1026
sr.,40-49,920,33,1026
sr.,40-49,860,33,1026
sr.,40-49,868,27,1026
sr.,40-49,833,25,1026
sr.,40-49,813,23,1026
sr.,40-49,843,21,1026
sr.,40-49,864,20,1026


Montería


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1


Neiva


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,40-49,347,74,873
sr.,40-49,Videograbaciones en sala/pres.,52,873
sr.,40-49,863,50,873
sr.,40-49,861,35,873
sr.,40-49,343,23,873
sr.,40-49,813,22,873
sr.,40-49,330,21,873
sr.,40-49,342,20,873
sr.,40-49,823,17,873
sr.,40-49,070,17,873


Pasto


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,40-49,Videograbaciones en sala/pres.,181,2601
sr.,40-49,863,111,2601
sr.,40-49,823,72,2601
sr.,40-49,347,65,2601
sr.,40-49,342,62,2601
sr.,40-49,Registro sonoro en sala/pres.,60,2601
sr.,40-49,813,56,2601
sr.,40-49,658,54,2601
sr.,40-49,150,46,2601
sr.,40-49,371,44,2601


Pereira


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,40-49,863,137,1158
sr.,40-49,813,57,1158
sr.,40-49,142,48,1158
sr.,40-49,843,39,1158
sr.,40-49,891,34,1158
sr.,40-49,Videograbaciones en sala/pres.,26,1158
sr.,40-49,823,25,1158
sr.,40-49,920,25,1158
sr.,40-49,371,22,1158
sr.,40-49,796,21,1158


Popayán


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,-17,863,1996,8630
sr.,-17,843,797,8630
sr.,-17,823,610,8630
sr.,-17,813,609,8630
sr.,-17,833,441,8630
sr.,-17,372,371,8630
sr.,-17,028,316,8630
sr.,-17,808,256,8630
sr.,-17,861,195,8630
sr.,-17,741,192,8630


Quibdó


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sra.,-17,863,79,319
sra.,-17,813,16,319
sra.,-17,823,15,319
sra.,-17,853,12,319
sra.,-17,843,10,319
sra.,-17,869,9,319
sra.,-17,808,8,319
sra.,-17,741,6,319
sra.,-17,895,6,319
sra.,-17,861,6,319


Riohacha


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1


San Andrés


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,-17,843,172,1374
sr.,-17,Videograbaciones en sala/pres.,157,1374
sr.,-17,863,142,1374
sr.,-17,813,125,1374
sr.,-17,808,90,1374
sr.,-17,823,85,1374
sr.,-17,741,79,1374
sr.,-17,833,59,1374
sr.,-17,372,30,1374
sr.,-17,891,24,1374


Santa Marta


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,30-39,863,73,436
sr.,30-39,813,61,436
sr.,30-39,347,37,436
sr.,30-39,621,22,436
sr.,30-39,895,19,436
sr.,30-39,823,18,436
sr.,30-39,891,16,436
sr.,30-39,843,15,436
sr.,30-39,340,15,436
sr.,30-39,346,15,436


Sincelejo


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,-17,863,174,1158
sr.,-17,741,103,1158
sr.,-17,823,90,1158
sr.,-17,843,73,1158
sr.,-17,028,67,1158
sr.,-17,813,66,1158
sr.,-17,372,54,1158
sr.,-17,833,50,1158
sr.,-17,808,33,1158
sr.,-17,925,27,1158


Tunja


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,40-49,Videograbaciones en sala/pres.,135,1961
sr.,40-49,863,110,1961
sr.,40-49,621,76,1961
sr.,40-49,813,60,1961
sr.,40-49,986,50,1961
sr.,40-49,301,40,1961
sr.,40-49,330,39,1961
sr.,40-49,333,38,1961
sr.,40-49,370,36,1961
sr.,40-49,823,32,1961


Valledupar


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,-17,863,105,682
sr.,-17,813,96,682
sr.,-17,823,56,682
sr.,-17,843,47,682
sr.,-17,028,25,682
sr.,-17,861,21,682
sr.,-17,833,20,682
sr.,-17,523,17,682
sr.,-17,372,15,682
sr.,-17,839,12,682


Villavicencio


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cuenta_dewey,Cuenta_grupo
Tratamiento,Rango_edad,DEWEY,Unnamed: 3_level_1,Unnamed: 4_level_1
sr.,50-64,Registro sonoro en sala/pres.,135,198
sr.,50-64,Videograbaciones en sala/pres.,28,198
sr.,50-64,863,6,198
sr.,50-64,Videograbaciones No pres./cons,4,198
sr.,50-64,Registro sonoro I. en sala/pre,4,198
sr.,50-64,070,3,198
sr.,50-64,Videograbación I. en sala/pres,2,198
sr.,50-64,861,2,198
sr.,50-64,741,1,198
sr.,50-64,303,1,198


In [30]:
dewey_dict = {}
with open('dewey2.txt') as f:
    for line in f:
        chunks = line.strip().split()
        dewey = chunks[0]
        topic = ' '.join(chunks[1:])
        dewey_dict[dewey] = topic

In [31]:
def dewey_report(main_dict, sub_dict, cat_dict):
    dewey_code = Counter(main_dict).most_common(1)[0][0]
    dewey_category = dewey_dict[dewey_code]
    print('Top main category:', dewey_code, '-', dewey_category)
    dewey_code = Counter(sub_dict).most_common(1)[0][0]
    dewey_category = dewey_dict[dewey_code]
    print('Top sub category:', dewey_code, '-', dewey_category)
    dewey_code = Counter(cat_dict).most_common(1)[0][0]
    dewey_category = dewey_dict[dewey_code]
    print('Top category:', dewey_code, '-', dewey_category)
    
    for k1 in main_dict:
        print(k1, dewey_dict[k1], ':', main_dict[k1])
        for k2 in sub_dict:
            if k2[:1].startswith(k1[:1]):
                print('    ', k2, dewey_dict[k2], ':', sub_dict[k2])
                for k3 in cat_dict:
                    if k3[:2].startswith(k2[:2]):
                        print('         ', k3, '-', dewey_dict[k3], ':', cat_dict[k3])

In [32]:
ciudades = selected_dewey_count_sr.index.levels[0]

for ciudad in ciudades:
    sr = selected_dewey_count_sr[ciudad]
    for i, subsr in sr.groupby(level=['Tratamiento', 'Rango_edad']):
        main_dict = defaultdict(int)
        sub_dict = defaultdict(int)
        cat_dict = defaultdict(int)
        for j, x in subsr.iteritems():
            try:
                n = int(j[2])
                cat_dict[j[2]] += x
                sub_dict[j[2][:2] + '-'] += x
                main_dict[j[2][:1] + '--'] += x
            except:
                pass
        
        print((ciudad, ) + i)
        dewey_report(main_dict, sub_dict, cat_dict)
        print()
    print()

('Armenia', 'sr.', '-17')
Top main category: 8-- - Literature
Top sub category: 84- - French & related literatures
Top category: 843 - French fiction
7-- Arts & recreation : 106
     74- Graphic arts & decorative arts : 106
          741 - Drawing & drawings : 106
3-- Social sciences : 85
     37- Education : 85
          372 - Primary education (elementary education) : 85
8-- Literature : 1669
     80- Literature, rhetoric & criticism : 95
          808 - Rhetoric & collections of literary texts from more than two literatures : 95
     82- English & Old English literatures : 237
          823 - English fiction : 237
     81- American literature in English : 281
          813 - American fiction in English : 281
     85- Italian, Romanian, & related literatures : 62
          853 - Italian fiction : 62
     83- German & related literatures : 204
          833 - German fiction : 204
     84- French & related literatures : 425
          843 - French fiction : 425
     86- Spanish, Portugu