In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import networkx as nx

from pathlib import Path

In [2]:
path = Path('../../../data/spanish-highschool')
specific_schools = ["t11_10", "t11_9", "t11_8", "t11_7", "t11_6", "t11_5", "t11_4", "t11_3", "t11_2", "t11_1", "t1", "t2", "t6"]

dfs = []
for school in specific_schools:
    df = pd.read_csv(path.joinpath("Edges_" + school + ".csv"), sep=',', header=0)
    if len(df.columns) == 4:
        df = df.drop(columns=['Unnamed: 0'])
    df.columns = ['source', 'target', 'weight']
    df["sign"] = np.sign(df.weight)
    print(df.head(1))
    dfs.append(df)

# df = pd.read_csv(path.joinpath('soc-wiki-elec.edges'), sep=' ', header=None, skiprows=1)

# # name columns
# df.columns = ['source', 'target', 'sign', 'date']

# df.head()

   source  target  weight  sign
0    5143    5232       1     1
   source  target  weight  sign
0    3177    3191       1     1
   source  target  weight  sign
0    2112    2121       1     1
   source  target  weight  sign
0    3043    3047       1     1
   source  target  weight  sign
0    2506    2513       2     1
   source  target  weight  sign
0    2039    2058       1     1
   source  target  weight  sign
0    4923    4929       1     1
   source  target  weight  sign
0    4825    4824       2     1
   source  target  weight  sign
0    4119    4204       1     1
   source  target  weight  sign
0    3501    3505       1     1
   source  target  weight  sign
0     191     198      -2    -1
   source  target  weight  sign
0     703     692       2     1
   source  target  weight  sign
0    1091    1090       1     1


In [3]:
# calculate triad census for each school and store it in a dataframe
triad_census = pd.DataFrame()
for i, df in enumerate(dfs):
    G = nx.from_pandas_edgelist(df, 'source', 'target', 'sign', create_using=nx.DiGraph())
    triads = nx.triads.triadic_census(G)
    triad_census = pd.concat([triad_census, pd.DataFrame(triads, index=[specific_schools[i]])])
    # append(pd.DataFrame(triads, index=[specific_schools[i]]))

In [4]:
triad_census

Unnamed: 0,003,012,102,021D,021U,021C,111D,111U,030T,030C,201,120D,120U,120C,210,300
t11_10,13405789,1882629,471127,43879,20332,22289,9445,22641,11057,383,3407,3290,4822,1770,3225,1171
t11_9,20181500,1944662,395291,43632,11639,14158,4793,18570,6753,131,2066,1445,4698,849,1795,723
t11_8,8003827,741280,146330,15807,3157,5295,1674,6378,2256,49,532,362,2113,302,713,301
t11_7,98310,55311,15244,5080,1691,3356,1381,3139,1260,37,350,450,963,186,479,223
t11_6,280939,140415,31075,12280,5339,8674,3128,7499,2811,137,1220,523,1925,484,883,308
t11_5,1769457,413904,215675,9615,3570,5715,4058,10067,4077,128,2086,2241,3656,1152,3510,2069
t11_4,166625,66400,41857,1420,1642,860,833,2255,1126,63,597,494,1243,564,1369,632
t11_3,708076,328624,98623,24468,6148,9475,4734,12983,8934,275,1869,3156,4219,1481,2725,1075
t11_2,51190737,4768357,1161737,94539,37328,41469,19359,48144,17613,698,6962,4811,9287,3371,5597,1891
t11_1,2977149,763382,203609,32169,15646,18102,8062,21759,7890,301,3493,1860,5492,1566,3016,1289


In [5]:
df_tcen_path = Path(path.joinpath('triad_census_schools.h5'))

try:
    df_tcen = pd.read_hdf(df_tcen_path)
except FileNotFoundError:
    print("File not found.")

df_tcen

ValueError: key must be provided when HDF5 file contains multiple datasets.

In [11]:
# save to hdf
triad_census.to_hdf(df_tcen_path, key='spanish-highschool')

triad_census.to_hdf(df_tcen_path.name, key='spanish-highschool')

  check_attribute_name(name)


In [6]:
df_tcen2 = pd.read_hdf(df_tcen_path)
df_tcen2

ValueError: key must be provided when HDF5 file contains multiple datasets.

# Analysis of classes

### Extract separate classes

In [7]:
# load dataframes from csv

dfs_nodes = []
schools_with_more_than_one_class = []
for school in specific_schools:
    df = pd.read_csv(path.joinpath("Nodes_" + school + ".csv"), sep=',', header=0)
    if df.columns[0] == 'Unnamed: 0':
        continue
    if len(df.columns) < 7:
        # schools t1, t6, t2 are not separate class, so they are not interesting
        # print(df.head(1))
        continue
    if len(np.unique(df['Curso'])) == 1:
        # only one class in the school
        # print(df.head(1))
        continue
    print(df.head(1))
    dfs_nodes.append(df)
    schools_with_more_than_one_class.append(school)
len(dfs_nodes)

     ID  Colegio  Curso Grupo  Sexo  prosocial  crttotal
0  5143       10      1     A  Male       0.67         2
     ID  Colegio  Curso Grupo    Sexo  prosocial  crttotal
0  3177        9      1     A  Female       0.67         2
     ID  Colegio  Curso Grupo    Sexo  prosocial  crttotal
0  2112        8      1     A  Female       0.67         1
     ID  Colegio  Curso Grupo  Sexo  prosocial  crttotal
0  2039        5      1     A  Male       0.67         1
     ID  Colegio  Curso Grupo    Sexo  prosocial  crttotal
0  4923        4      1     A  Female       0.33         0
     ID  Colegio  Curso Grupo    Sexo  prosocial  crttotal
0  4825        3      1     A  Female        1.0         2
     ID  Colegio  Curso Grupo  Sexo  prosocial  crttotal
0  4119        2      1     A  Male       0.33         1
     ID  Colegio  Curso Grupo  Sexo  prosocial  crttotal
0  3501        1      1     A  Male       0.67         1


8

In [8]:
len(dfs_nodes)

8

In [13]:
# split dataframes into separate classes by column Curso and store data in list of lists
dfs_nodes_classes = []
for df in dfs_nodes:
    dfs_nodes_classes.append([df[df['Curso'] == i] for i in np.unique(df['Curso'])])

In [24]:
# create separate networks from edges based on classes and store them in a list
# ids of nodes in edges and classes match, so we can use the same ids to extract which edges should go to which network

dfs_edges_classes = []

for i, dfs_nodes_class in enumerate(dfs_nodes_classes):
    dfs_edges_classes.append([])

    edges_school_id = np.where(np.array(specific_schools) == schools_with_more_than_one_class[i])[0][0]
    for df_nodes_class in dfs_nodes_class:
        # get ids of nodes in the class
        ids = df_nodes_class['ID']
        
        # get edges that have both nodes in the class
        edges = dfs[edges_school_id][(dfs[edges_school_id]['source'].isin(ids)) & (dfs[edges_school_id]['target'].isin(ids))]

        dfs_edges_classes[-1].append(edges)

In [32]:
dfs_edges_classes[0]

# get lengths of all classes
lengths = [len(dfs_edges_class) for dfs_edges_class in dfs_edges_classes]
print(lengths)

# print sizes of all classes
sizes = [df.shape[0] for df in dfs_edges_classes[0]]
sizes

[4, 4, 4, 4, 4, 2, 4, 2]


[1605, 1919, 1670, 1435]

In [33]:
# save edges dataframes to csv

for i, dfs_edges_class in enumerate(dfs_edges_classes):
    for j, df in enumerate(dfs_edges_class):
        class_nr = np.unique(dfs_nodes_classes[i][j]['Curso'])[0]
        df.to_csv(path.joinpath('Edges_' + schools_with_more_than_one_class[i] + '_class_' + str(class_nr) + '.csv'))
        # df.to_csv(path.joinpath('Edges_' + schools_with_more_than_one_class[i] + '_class_' + str(class_nr) + '.h5'), key='spanish-highschool')

In [35]:
# calculate triad census for each class and store it in a dataframe

triad_census_classes = pd.DataFrame()

for i, dfs_edges_class in enumerate(dfs_edges_classes):
    triad_census_class = pd.DataFrame()

    school = schools_with_more_than_one_class[i]
    for j, df in enumerate(dfs_edges_class):
        class_nr = np.unique(dfs_nodes_classes[i][j]['Curso'])[0]

        G = nx.from_pandas_edgelist(df, 'source', 'target', 'sign', create_using=nx.DiGraph())
        triads = nx.triads.triadic_census(G)
        triad_census_class = pd.concat([triad_census_class, pd.DataFrame(triads, index=[school + '_' + str(class_nr)])])
    triad_census_classes = pd.concat([triad_census_classes, triad_census_class])

triad_census_classes

Unnamed: 0,003,012,102,021D,021U,021C,111D,111U,030T,030C,201,120D,120U,120C,210,300
t11_10_1,54642,40615,15262,4975,2054,3774,2254,4508,1540,69,1155,565,922,458,960,291
t11_10_2,57229,44990,15988,8889,2983,5111,2358,6013,3045,101,971,1036,1402,502,1035,443
t11_10_3,66464,48362,13628,7877,3457,4424,2943,3353,2514,85,767,1103,598,414,642,218
t11_10_4,63984,39902,12418,5802,2191,3863,1890,3767,1486,128,514,586,681,396,588,219
t11_9_1,18355,13580,6270,1681,776,1471,1012,1990,547,23,485,311,464,215,472,253
t11_9_2,62382,37391,16433,3682,1510,2597,1811,3756,953,59,878,535,731,328,744,254
t11_9_3,33107,17822,7083,3133,672,1303,871,1544,666,25,296,281,240,144,235,103
t11_9_4,44549,26382,9680,2687,1162,1698,1099,1988,804,24,407,318,464,162,344,113
t11_8_1,6613,7340,2916,1107,398,647,466,1020,402,13,161,116,410,103,267,121
t11_8_2,23672,13192,6059,727,560,910,729,934,202,18,177,110,212,80,217,106


There is a problem with number of triads. When we split schools in the classes, for some of the triads we get smaller number. 
As if some of the links were lost. 

In [41]:
# print sizes of all schools
print([df.shape[0] for df in dfs])

# print sizes of all classes 
for dfs_edges_class in dfs_edges_classes:
    print(sum([df.shape[0] for df in dfs_edges_class]))
# sizes = [df.shape[0] for df in dfs_edges_classes[0]]
# np.sum(sizes)

# the number of edges is smaller. Why?

[7613, 6379, 3267, 1426, 2403, 4498, 1793, 4176, 11960, 5535, 8557, 3755, 12812]
6629
4195
2014
4118
1587
3320
9544
4496


In [51]:
# get number of students in each class based on nodes data
students = []
for dfs_nodes_class in dfs_nodes_classes:
    students.append([df.shape[0] for df in dfs_nodes_class])
print(students)

# get number of students in each class based on edges data
students_edges = []
for dfs_edges_class in dfs_edges_classes:
    for df in dfs_edges_class:
        ids = np.unique(np.concatenate([np.unique(df['source']), np.unique(df['target'])]))
        students_edges.append(len(ids))
print(students_edges)

[[94, 98, 99, 95], [67, 94, 75, 83], [52, 67, 47, 43], [52, 59, 56, 56], [29, 27, 28, 26], [73, 83], [114, 155, 118, 125], [107, 125]]
[94, 98, 99, 95, 67, 94, 75, 83, 52, 67, 47, 42, 52, 59, 56, 56, 29, 27, 28, 26, 73, 83, 114, 155, 118, 125, 107, 125]


In [None]:
# find which ids are missing in the edges data
missing_ids = []
for i, dfs_edges_class in enumerate(dfs_edges_classes):
    for j, df in enumerate(dfs_edges_class):
        ids = np.unique(np.concatenate([np.unique(df['source']), np.unique(df['target'])]))
        missing_ids.append(np.setdiff1d(dfs_nodes_classes[i][j]['ID'], ids))

missing_ids

[array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([2434]),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64)]

I did investigation. The reason of the difference is (most likely) because some data of students are not in Nodes data. 
I checked one of such students, and he/she did not have any outgoing links be there were some incoming links. 
It is ok to remove such a student. 

I will still confirm it

In [60]:
# compare student school ids from dfs and dfs_edges

for i, dfs_nodes_class in enumerate(dfs_nodes_classes):
    ids_classes = np.array([])
    for j, df in enumerate(dfs_nodes_class):
        ids_classes = np.concatenate([ids_classes, np.unique(dfs_edges_classes[i][j]['source']), np.unique(dfs_edges_classes[i][j]['target'])])
    
    school = schools_with_more_than_one_class[i]
    school_id = np.where(np.array(specific_schools) == school)[0][0]
    ids_school = np.unique(np.concatenate([np.unique(dfs[school_id]['source']), np.unique(dfs[school_id]['target'])]))

    print(np.setdiff1d(ids_school, ids_classes))

[5142 5144 5157 5166 5182 5183 5186 5197 5203 5205 5212 5219 5226 5227
 5228 5230 5244 5247 5250 5251 5252 5255 5256 5261 5268 5276 5279 5294
 5299 5300 5312 5319 5345 5350 5351 5365 5369 5371 5373 5375 5384 5389
 5398 5400 5408 5420 5421 5422 5424 5425 5427 5436 5439 5447 5461 5495
 5497 5507 5508 5510 5516 5522 5525 5527 5528 5547 5558 5564 5565 5568
 5569 5580]
[3176 3178 3180 3182 3183 3184 3185 3186 3187 3190 3192 3198 3199 3200
 3201 3202 3203 3204 3205 3207 3208 3209 3211 3215 3218 3219 3220 3222
 3224 3229 3233 3236 3237 3238 3239 3240 3241 3243 3245 3246 3247 3248
 3251 3253 3257 3258 3259 3261 3262 3263 3264 3269 3277 3290 3293 3298
 3299 5600 5602 5605 5608 5614 5617 5618 5619 5621 5622 5625 5628 5629
 5632 5633 5634 5635 5638 5639 5641 5643 5647 5650 5654 5656 5657 5658
 5659 5662 5664 5665 5669 5670 5672 5675 5676 5677 5678 5680 5681 5682
 5684 5690 5692 5696 5700 5702 5705 5707 5708 5710 5712 5715 5720 5725
 5728 5729 5730 5742 5747 5751 5752 5755 5756 5758 5759 5760 5761

It seems that above hypothesis makes sense. A chosen student not in classes appeared only in outgoing links in the dataset. 

Gonna confirm it further. 

In [62]:
# compare student school ids from dfs and dfs_edges

for i, dfs_nodes_class in enumerate(dfs_nodes_classes):
    ids_classes = np.array([])
    for j, df in enumerate(dfs_nodes_class):
        ids_classes = np.concatenate([ids_classes, np.unique(dfs_edges_classes[i][j]['source']), np.unique(dfs_edges_classes[i][j]['target'])])
    
    school = schools_with_more_than_one_class[i]
    school_id = np.where(np.array(specific_schools) == school)[0][0]
    ids_school = np.unique(np.concatenate([np.unique(dfs[school_id]['source'])]))

    print(np.setdiff1d(ids_school, ids_classes))

[]
[]
[2434]
[]
[]
[]
[]
[]


Yes, this is confirmed. 

By looking at list of students having outgoing links, they match. 

So, it means we have to repeat triad census for other schools by looking at students lists from Nodes_*.csv. 

In [63]:
# load dataframes from csv

dfs_nodes = []
schools_with_more_than_one_class = []
for school in specific_schools:
    df = pd.read_csv(path.joinpath("Nodes_" + school + ".csv"), sep=',', header=0)
    if df.columns[0] == 'Unnamed: 0':
        # remove first column if it is unnamed
        df = df.drop(columns=['Unnamed: 0'])
    print(df.head(1))
    dfs_nodes.append(df)
    schools_with_more_than_one_class.append(school)
len(dfs_nodes)

     ID  Colegio  Curso Grupo  Sexo  prosocial  crttotal
0  5143       10      1     A  Male       0.67         2
     ID  Colegio  Curso Grupo    Sexo  prosocial  crttotal
0  3177        9      1     A  Female       0.67         2
     ID  Colegio  Curso Grupo    Sexo  prosocial  crttotal
0  2112        8      1     A  Female       0.67         1
     ID  Colegio  Curso Grupo    Sexo  prosocial  crttotal
0  3043        7      1     A  Female       0.67         2
     ID  Colegio  Curso Grupo    Sexo  prosocial  crttotal
0  2506        6      1     A  Female        1.0         2
     ID  Colegio  Curso Grupo  Sexo  prosocial  crttotal
0  2039        5      1     A  Male       0.67         1
     ID  Colegio  Curso Grupo    Sexo  prosocial  crttotal
0  4923        4      1     A  Female       0.33         0
     ID  Colegio  Curso Grupo    Sexo  prosocial  crttotal
0  4825        3      1     A  Female        1.0         2
     ID  Colegio  Curso Grupo  Sexo  prosocial  crttotal
0  4119

13

In [64]:
# for chosen schools split dataframes into separate classes by column Curso and store data in list of lists
dfs_nodes_classes = []
for df in dfs_nodes:
    if len(df.columns) < 7:
        # schools t1, t6, t2 are not separate class, so they are not interesting
        dfs_nodes_classes.append([df])
    else:
        dfs_nodes_classes.append([df[df['Curso'] == i] for i in np.unique(df['Curso'])])

In [65]:
# create separate networks from edges based on classes and store them in a list
# ids of nodes in edges and classes match, so we can use the same ids to extract which edges should go to which network

dfs_edges_classes = []

for i, dfs_nodes_class in enumerate(dfs_nodes_classes):
    dfs_edges_classes.append([])

    edges_school_id = np.where(np.array(specific_schools) == schools_with_more_than_one_class[i])[0][0]
    for df_nodes_class in dfs_nodes_class:
        # get ids of nodes in the class
        ids = df_nodes_class['ID']
        
        # get edges that have both nodes in the class
        edges = dfs[edges_school_id][(dfs[edges_school_id]['source'].isin(ids)) & (dfs[edges_school_id]['target'].isin(ids))]

        dfs_edges_classes[-1].append(edges)

In [66]:
dfs_edges_classes[0]

# get lengths of all classes
lengths = [len(dfs_edges_class) for dfs_edges_class in dfs_edges_classes]
print(lengths)

# print sizes of all classes
sizes = [df.shape[0] for df in dfs_edges_classes[0]]
sizes

[4, 4, 4, 1, 1, 4, 4, 2, 4, 2, 1, 1, 1]


[1605, 1919, 1670, 1435]

In [67]:
# save edges dataframes to csv

for i, dfs_edges_class in enumerate(dfs_edges_classes):
    for j, df in enumerate(dfs_edges_class):
        if len(dfs_nodes_classes[i][j].columns) < 7:
            # schools t1, t6, t2 are not separate class, so they shoud be saved differently
            df.to_csv(path.joinpath('Edges_' + schools_with_more_than_one_class[i] + '_multiple.csv'))
        else:
            class_nr = np.unique(dfs_nodes_classes[i][j]['Curso'])[0]
            df.to_csv(path.joinpath('Edges_' + schools_with_more_than_one_class[i] + '_class_' + str(class_nr) + '.csv'))

In [68]:
# calculate triad census for each class and store it in a dataframe

triad_census_classes = pd.DataFrame()

for i, dfs_edges_class in enumerate(dfs_edges_classes):
    triad_census_class = pd.DataFrame()

    school = schools_with_more_than_one_class[i]
    for j, df in enumerate(dfs_edges_class):
        if len(dfs_nodes_classes[i][j].columns) < 7:
            # schools t1, t6, t2 are not separate class, so they shoud be saved differently
            G = nx.from_pandas_edgelist(df, 'source', 'target', 'sign', create_using=nx.DiGraph())
            triads = nx.triads.triadic_census(G)
            triad_census_class = pd.concat([triad_census_class, pd.DataFrame(triads, index=[school])])
        else:
            class_nr = np.unique(dfs_nodes_classes[i][j]['Curso'])[0]

            G = nx.from_pandas_edgelist(df, 'source', 'target', 'sign', create_using=nx.DiGraph())
            triads = nx.triads.triadic_census(G)
            triad_census_class = pd.concat([triad_census_class, pd.DataFrame(triads, index=[school + '_' + str(class_nr)])])
    triad_census_classes = pd.concat([triad_census_classes, triad_census_class])

triad_census_classes

Unnamed: 0,003,012,102,021D,021U,021C,111D,111U,030T,030C,201,120D,120U,120C,210,300
t11_10_1,54642,40615,15262,4975,2054,3774,2254,4508,1540,69,1155,565,922,458,960,291
t11_10_2,57229,44990,15988,8889,2983,5111,2358,6013,3045,101,971,1036,1402,502,1035,443
t11_10_3,66464,48362,13628,7877,3457,4424,2943,3353,2514,85,767,1103,598,414,642,218
t11_10_4,63984,39902,12418,5802,2191,3863,1890,3767,1486,128,514,586,681,396,588,219
t11_9_1,18355,13580,6270,1681,776,1471,1012,1990,547,23,485,311,464,215,472,253
t11_9_2,62382,37391,16433,3682,1510,2597,1811,3756,953,59,878,535,731,328,744,254
t11_9_3,33107,17822,7083,3133,672,1303,871,1544,666,25,296,281,240,144,235,103
t11_9_4,44549,26382,9680,2687,1162,1698,1099,1988,804,24,407,318,464,162,344,113
t11_8_1,6613,7340,2916,1107,398,647,466,1020,402,13,161,116,410,103,267,121
t11_8_2,23672,13192,6059,727,560,910,729,934,202,18,177,110,212,80,217,106


In [69]:
# calculate triad census also for whole schools but after removing students not in Nodes data

triad_census_schools = pd.DataFrame()

for i, df in enumerate(dfs):
    triad_census_school = pd.DataFrame()

    school = specific_schools[i]
    ids = np.unique(np.concatenate([np.unique(df['source']), np.unique(df['target'])]))
    nodes = dfs_nodes[i]
    ids_nodes = nodes['ID']
    ids = np.intersect1d(ids, ids_nodes)
    df = df[(df['source'].isin(ids)) & (df['target'].isin(ids))]

    G = nx.from_pandas_edgelist(df, 'source', 'target', 'sign', create_using=nx.DiGraph())
    triads = nx.triads.triadic_census(G)
    triad_census_school = pd.concat([triad_census_school, pd.DataFrame(triads, index=[school])])

    triad_census_schools = pd.concat([triad_census_schools, triad_census_school])
triad_census_schools

Unnamed: 0,003,012,102,021D,021U,021C,111D,111U,030T,030C,201,120D,120U,120C,210,300
t11_10,7738267,1271171,393682,27543,10685,17172,9445,17641,8585,383,3407,3290,3603,1770,3225,1171
t11_9,4458093,612127,240978,11183,4120,7069,4793,9278,2970,131,2066,1445,1899,849,1795,723
t11_8,1204469,180673,78617,2569,1331,2177,1674,2802,873,49,532,362,812,302,713,301
t11_7,33426,25777,10944,2170,1097,2247,1381,2101,741,37,350,450,551,186,479,223
t11_6,89012,56624,21436,4142,2828,5278,3128,4536,1426,137,1220,523,955,484,883,308
t11_5,1278122,308642,194574,6259,1964,4767,4058,8256,3041,128,2086,2241,2602,1152,3510,2069
t11_4,122428,46214,38144,680,291,569,833,1570,637,63,597,494,735,564,1369,632
t11_3,320776,171080,76828,10938,3417,6119,4734,8887,4957,275,1869,3156,2303,1481,2725,1075
t11_2,18844527,2371828,832494,47819,18629,28808,19359,33908,12002,698,6962,4811,6016,3371,5597,1891
t11_1,1395477,418298,158771,16885,8012,12910,8062,15759,5254,301,3493,1860,3407,1566,3016,1289


In [70]:
# save to hdf

df_tcen_path = Path(path.joinpath('triad_census_schools.h5'))

triad_census_classes.to_hdf(df_tcen_path, key='spanish-highschool_classes')
triad_census_schools.to_hdf(df_tcen_path, key='spanish-highschool_schools')


triad_census_classes.to_hdf(df_tcen_path.name, key='spanish-highschool_classes')
triad_census_schools.to_hdf(df_tcen_path.name, key='spanish-highschool_schools')

  check_attribute_name(name)
  check_attribute_name(name)
