In [None]:
import pandas as pd    
2   
3 # from sorted_nicely import sorted_nicely 
4 
 
5   
6 def get_list_of_index_names(df): 
7     """returns name of index in a data frame as a list. (single element list if the dataframe as a single index)"" 
8     """ 
9      
10     if df.index.name is None: 
11         return list(df.index.names) 
12     else: 
13         return [df.index.name] #do not use list( ) as list breaks strings into list of chars 
14 
 
15   
16      
17 def broadcast_simple( df_in, index):     
18     """simply replicates df n times and adds index (where index has n distinct elements) as the last level of a multi index. 
19     if index is a multiindex with (m,p) this will work too (and replicte df n=m *p times). But if some of the levels of index are already included in df_in (BASED ON NAME ONLY), these are ignored (see example). 
20          
21     EXAMPLES 
22      
23     s=pd.DataFrame(["a","b","c"], index=pd.Index(["A", "B", "C"], name="letters"), columns=["value"]) 
24     s 
25      
26         value 
27     A 	a 
28     B 	b 
29  
30     #works 
31     my_index=pd.Index(["one", "two"], name="numbers") 
32     broadcast_simple(s, my_index) 
33  
34        numbers 
35     A  one        a 
36        two        a 
37     B  one        b 
38        two        b 
39    Name: value, dtype: object 
40      
41     #multi index example 
42     my_index=pd.MultiIndex.from_product([["one", "two"], ["cat", "dog"]], names=["numbers", "pets"]) 
43     broadcast_simple(s, my_index) 
44  
45        numbers  pets 
46     A  one      cat     a 
47                 dog     a 
48        two      cat     a 
49                 dog     a 
50     B  one      cat     b 
51                 dog     b 
52        two      cat     b 
53                 dog     b 
54     Name: value, dtype: object 
55      
56     #Ignored level in multi index example 
57     my_index=pd.MultiIndex.from_product([["one", "two"], ["X", "Y"]], names=["numbers", "letters"]) 
58     broadcast_simple(s, my_index) 
59      
60     letters  numbers 
61     A        one        a 
62              two        a 
63     B        one        b 
64              two        b 
65     C        one        c 
66              two        c 
67      
68  
69     #Raise error because the index should be named 
70     my_index=pd.Index(["one", "two"]) 
71     broadcast_simple(s, my_index) 
72      
73     """ 
74 
 
75     #in case of MultiIndex, performs this function on each one of the levels of the index 
76     if type(index)== pd.MultiIndex: 
77         y = df_in.copy() 
78         for idxname in [i for i in index.names if i not in get_list_of_index_names(df_in)]: 
79                 y = broadcast_simple(y, index.get_level_values(idxname)) 
80         return y 
81      
82     cat_list = index.unique() 
83     nb_cats =len(cat_list) 
84     if index.name is None: 
85         raise Exception("index should be named") 
86          
87      
88     y= pd.concat([df_in]*nb_cats,  
89                     keys = cat_list,  
90                     names=[index.name]+get_list_of_index_names(df_in) 
91                  ) 
92                  
93     #puts new index at the end             
94     y=y.reset_index(index.name).set_index(index.name, append=True).sort_index() 
95      
96     return y.squeeze() 
97      
98 
 
99 def concat_categories(p,np, index): 
100     """works like pd.concat with keys but swaps the index so that the new index is innermost instead of outermost 
101     http://pandas.pydata.org/pandas-docs/stable/merging.html#concatenating-objects 
102     """ 
103      
104     if index.name is None: 
105         raise Exception("index should be named") 
106          
107      
108     y= pd.concat([p, np],  
109         keys = index,  
110         names=[index.name]+get_list_of_index_names(p) 
111             )#.sort_index() 
112      
113     #puts new index at the end             
114     y=y.reset_index(index.name).set_index(index.name, append=True).sort_index() 
115      
116     #makes sure a series is returned when possible 
117     return y.squeeze() 
