In [75]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pulp

## Reading in the datasets saved with the latest forecast model

In [3]:
#forecast month
fm = pd.read_csv('test.csv')
pd.DataFrame.head(fm)

Unnamed: 0.1,Unnamed: 0,store_nbr,class,year,date,month_year,city,V1,family,sum_unit_sales.x,...,tran_on_wknd,base_prediction,base_residuals,dow,RT4_predictions,MR_predictions,base_residuals_tran,base_residuals_log,RT4_residuals,perc_on_prom
0,1,1,1002,2017,2017-07-01,201707,Quito,12244072,GROCERY I,31.0,...,0.172886,2.308426,0.509756,Saturday,3.375603,,865.509756,6.763319,-0.557421,0.0
1,2,1,1002,2017,2017-07-02,201707,Quito,12256731,GROCERY I,16.0,...,0.172886,2.308426,-0.530648,Sunday,3.375603,,864.469352,6.762116,-1.597825,0.0
2,3,1,1002,2017,2017-07-03,201707,Quito,12269201,GROCERY I,16.0,...,0.172886,2.308426,-0.530648,Monday,1.375843,,864.469352,6.762116,0.244965,0.0
3,4,1,1002,2017,2017-07-04,201707,Quito,12281198,GROCERY I,18.0,...,0.172886,2.308426,-0.508426,Tuesday,1.375843,,864.491574,6.762142,0.267187,0.0
4,5,1,1002,2017,2017-07-05,201707,Quito,12293172,GROCERY I,27.0,...,0.172886,2.308426,0.391574,Wednesday,1.375843,,865.391574,6.763182,1.167187,0.2


In [4]:
#last available data month
cm = pd.read_csv('last_month.csv')
pd.DataFrame.head(cm)

Unnamed: 0.1,Unnamed: 0,store_nbr,year,date,month_year,city,V1,class,family,sum_unit_sales.x,...,dcoilwtico.y,sum_unit_sales.y,day_of_month,wage_factor,month,no_items.y,transactions.y,is_weekday,tran_on_wknd,dow
0,1,1,2017,2017-05-01,201705,Quito,11512880,1002,GROCERY I,19.0,...,48.574355,26911839.25,1,0.857143,5,2295,339992,0,0.172886,Monday
1,2,1,2017,2017-05-02,201705,Quito,11525435,1002,GROCERY I,39.0,...,48.574355,26911839.25,2,0.714286,5,2295,339992,0,0.172886,Tuesday
2,3,1,2017,2017-05-03,201705,Quito,11537442,1002,GROCERY I,41.0,...,48.574355,26911839.25,3,0.571429,5,2295,339992,0,0.172886,Wednesday
3,4,1,2017,2017-05-04,201705,Quito,11549415,1002,GROCERY I,27.0,...,48.574355,26911839.25,4,0.428571,5,2295,339992,0,0.172886,Thursday
4,5,1,2017,2017-05-05,201705,Quito,11561265,1002,GROCERY I,21.0,...,48.574355,26911839.25,5,0.285714,5,2295,339992,0,0.172886,Friday


In [11]:
#items
items = pd.read_csv('items.csv')
pd.DataFrame.head(items)

Unnamed: 0,item_nbr,family,class,perishable
0,96995,GROCERY I,1093,0
1,99197,GROCERY I,1067,0
2,103501,CLEANING,3008,0
3,103520,GROCERY I,1028,0
4,103665,BREAD/BAKERY,2712,1


In [14]:
#store features
store_features = pd.read_csv('store_features_year.csv')
pd.DataFrame.head(store_features)

Unnamed: 0.1,Unnamed: 0,store_nbr,year,no_items,transactions,is_weekday,tran_on_wknd,lat,long
0,0,1,2014,3144.0,422777.0,0,0.165558,-0.173986,-78.469864
1,1,1,2015,2058.0,558606.0,0,0.169918,-0.173986,-78.469864
2,2,1,2016,4148.0,544262.0,0,0.173407,-0.173986,-78.469864
3,3,1,2017,2295.0,339992.0,0,0.172886,-0.173986,-78.469864
4,4,10,2014,2334.0,290499.0,0,0.308297,-0.173986,-78.469864


## Data Transformation

We need to get the data into a format that we can work with for an optimization algorithm. 

### 1) Objective function

Our objective is to maximize sales by adding in products in the classes that have a high sales_per_unit.

We currently have forecasts of sales per unit as store-class-date for a month. We need to sum these up so that we can get the sales per unit for a store-class at a month level. 

In [9]:
group=fm.groupby(['store_nbr','class'],as_index=False)
fm_agg=pd.DataFrame(group['RT4_predictions'].agg('sum'))
pd.DataFrame.head(fm_agg)
fm_agg.columns=['store_nbr', 'class', 'sales_per_unit_m']
pd.DataFrame.head(fm_agg)

Unnamed: 0,store_nbr,class,sales_per_unit_m
0,1,1002,62.648735
1,1,1003,90.686116
2,1,1004,139.927814
3,1,1006,33.001366
4,1,1008,61.744868


In [10]:
group=cm.groupby(['store_nbr','class'],as_index=False)
cm_agg=pd.DataFrame(group['sales_per_unit'].agg('sum'))
cm_agg.columns=['store_nbr', 'class', 'sales_per_unit_m']
pd.DataFrame.head(cm_agg)

Unnamed: 0,store_nbr,class,sales_per_unit_m
0,1,1002,70.038933
1,1,1003,66.0
2,1,1004,159.848417
3,1,1006,56.5
4,1,1008,72.708333


We want the optimization algorithm to tell us how many unique products we should have in each class for each store so that we maximize sales. 

In our case, the coefficients of our objective function will be sales_per_unit_m.

### 2) Our constraints

Out first contraint is how many unique products we have in each class. We will calculate these from the 'items' file. 

In [13]:
class_items=pd.DataFrame(items['class'].value_counts())
class_items.columns=['max_range']
class_items['class']=class_items.index
pd.DataFrame.head(class_items)

Unnamed: 0,max_range,class
1016,133,1016
1040,110,1040
1124,100,1124
1034,98,1034
1122,81,1122


Our second contraint is how many unique products we can have in a store. We will use the current year's max from the store features table built at the feature building stage. 

In [24]:
# if our forecast month is January or February, we will take the previous year's values. 
m=fm['month'].unique()
if m in [1,2]:
    cyear=fm['year'].unique()-1
else:
    cyear=fm['year'].unique()
print(cyear)

[2017]


In [30]:
store_items=store_features[store_features.year == 2017]
pd.DataFrame.head(store_items)

Unnamed: 0.1,Unnamed: 0,store_nbr,year,no_items,transactions,is_weekday,tran_on_wknd,lat,long
3,3,1,2017,2295.0,339992.0,0,0.172886,-0.173986,-78.469864
7,7,10,2017,1610.0,195798.0,0,0.311428,-0.173986,-78.469864
11,11,11,2017,2354.0,525441.0,0,0.313381,0.039332,-78.14587
15,15,12,2017,1752.0,270026.0,0,0.300441,-0.932321,-78.617025
19,19,13,2017,1656.0,200155.0,0,0.320557,-0.932321,-78.617025


In [34]:
store_items=store_items[['store_nbr','no_items']]
pd.DataFrame.head(store_items)

Unnamed: 0,store_nbr,no_items
3,1,2295.0
7,10,1610.0
11,11,2354.0
15,12,1752.0
19,13,1656.0


## Optimization - LP

LP optimization algorithms are typically used in allocation problems, therefore I am going to use it too in a loop for each store. But let's see how it works for a store first.

We need to make sure that all the dataframes follow the same order

In [56]:
fm_agg = fm_agg.sort_values(by=["store_nbr","class"])
pd.DataFrame.head(fm_agg)

Unnamed: 0,store_nbr,class,sales_per_unit_m
0,1,1002,62.648735
1,1,1003,90.686116
2,1,1004,139.927814
3,1,1006,33.001366
4,1,1008,61.744868


In [57]:
class_items = class_items.sort_values(by=["class"])
pd.DataFrame.head(class_items)

Unnamed: 0,max_range,class
1002,31,1002
1003,4,1003
1004,39,1004
1005,1,1005
1006,12,1006


In [58]:
stores = np.unique(store_items['store_nbr'])
print(stores)

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
 51 52 53 54]


In [149]:
range(len(stores))
#set the store
s=0

#filter the data
fm_agg_s=fm_agg[fm_agg.store_nbr==stores[s]]
store_max_range=store_items[store_items.store_nbr==stores[s]].no_items
decision_variables_no=len(np.array(fm_agg_s.sales_per_unit_m))
class_items_s=class_items[class_items['class'].isin(fm_agg_s['class'])]
pd.DataFrame.head(class_items_s)
class_items_s


Unnamed: 0,max_range,class,min_range
1002,31,1002,0
1003,4,1003,0
1004,39,1004,0
1006,12,1006,0
1008,12,1008,0


Create the LP Object

In [152]:
# create the LP object, set up as a maximization problem
prob = pulp.LpProblem('Macrospace', pulp.LpMaximize)

Create up the decision variables

In [153]:
# the number of items ranged for each classs
dec_var_dict={}
for c in range(len(np.array(fm_agg_s.sales_per_unit_m))):
    v='x_'+str(c)
    dec_var_dict[v]=pulp.LpVariable(v, lowBound=np.array(class_items_s.min_range)[c],upBound=np.array(class_items_s.max_range)[c], cat='Integer')
print(dec_var_dict)

{'x_0': x_0, 'x_1': x_1, 'x_2': x_2, 'x_3': x_3, 'x_4': x_4, 'x_5': x_5, 'x_6': x_6, 'x_7': x_7, 'x_8': x_8, 'x_9': x_9, 'x_10': x_10, 'x_11': x_11, 'x_12': x_12, 'x_13': x_13, 'x_14': x_14, 'x_15': x_15, 'x_16': x_16, 'x_17': x_17, 'x_18': x_18, 'x_19': x_19, 'x_20': x_20, 'x_21': x_21, 'x_22': x_22, 'x_23': x_23, 'x_24': x_24, 'x_25': x_25, 'x_26': x_26, 'x_27': x_27, 'x_28': x_28, 'x_29': x_29, 'x_30': x_30, 'x_31': x_31, 'x_32': x_32, 'x_33': x_33, 'x_34': x_34, 'x_35': x_35, 'x_36': x_36, 'x_37': x_37, 'x_38': x_38, 'x_39': x_39, 'x_40': x_40, 'x_41': x_41, 'x_42': x_42, 'x_43': x_43, 'x_44': x_44, 'x_45': x_45, 'x_46': x_46, 'x_47': x_47, 'x_48': x_48, 'x_49': x_49, 'x_50': x_50, 'x_51': x_51, 'x_52': x_52, 'x_53': x_53, 'x_54': x_54, 'x_55': x_55, 'x_56': x_56, 'x_57': x_57, 'x_58': x_58, 'x_59': x_59, 'x_60': x_60, 'x_61': x_61, 'x_62': x_62, 'x_63': x_63, 'x_64': x_64, 'x_65': x_65, 'x_66': x_66, 'x_67': x_67, 'x_68': x_68, 'x_69': x_69, 'x_70': x_70, 'x_71': x_71, 'x_72': x_7

Add the objective function

In [154]:
# maximize sales
sales=0.0
for c in range(len(np.array(fm_agg_s.sales_per_unit_m))):
    sales=sales+dec_var_dict['x_'+str(c)]*np.array(fm_agg_s.sales_per_unit_m)[c]
prob += sales  # here's where we actually add it to the obj function

Add the constraints

In [155]:
# maximum items ranged in a store
store_range=0.0
for c in range(len(np.array(fm_agg_s.sales_per_unit_m))):
    store_range=store_range + dec_var_dict['x_'+str(c)]
prob += (store_range <= store_max_range)

Vizualize the problem

In [156]:
print(prob)

Macrospace:
MAXIMIZE
62.648734773*x_0 + 90.6861164597*x_1 + 262.348861548*x_10 + 315.300916988*x_100 + 777.461451038*x_101 + 715.645318629*x_102 + 251.779208332*x_103 + 267.543826142*x_104 + 256.464596532*x_105 + 1788.41858633*x_106 + 41.5288005313*x_107 + 185.215205974*x_108 + 401.300211861*x_109 + 58.1095415088*x_11 + 357.320481821*x_110 + 274.711560149*x_111 + 344.489074012*x_112 + 124.986427057*x_113 + 95.7693904523*x_114 + 228.852623453*x_115 + 53.8013565408*x_116 + 82.1017164232*x_117 + 156.829290119*x_118 + 204.992837498*x_119 + 11.4519923302*x_12 + 56.0979885321*x_120 + 190.075407422*x_121 + 51.4140885514*x_122 + 134.835960083*x_123 + 138.282186945*x_124 + 195.428472659*x_125 + 115.753921761*x_126 + 90.3116551987*x_127 + 52.7422670018*x_128 + 100.4856024*x_129 + 68.5888520946*x_13 + 53.512072691*x_130 + 109.823665059*x_131 + 31.097170352*x_132 + 89.0248996367*x_133 + 120.794092989*x_134 + 112.970116885*x_135 + 335.910877421*x_136 + 38.2620023608*x_137 + 68.9595440876*x_138 + 58

Solve the LP

In [157]:
# solve the LP using the default solver
optimization_result = prob.solve()

# make sure we got an optimal solution
assert optimization_result == pulp.LpStatusOptimal

In [158]:
for v in prob.variables():
    c_name=pd.to_numeric(v.name[2:])
    print(c_name, "=", v.varValue)

0 = 0.0
1 = 0.0
10 = 9.0
100 = 8.0
101 = 15.0
102 = 14.0
103 = 27.0
104 = 31.0
105 = 14.0
106 = 8.0
107 = 0.0
108 = 17.0
109 = 33.0
11 = 0.0
110 = 9.0
111 = 19.0
112 = 7.0
113 = 1.0
114 = 2.0
115 = 9.0
116 = 0.0
117 = 0.0
118 = 8.0
119 = 11.0
12 = 0.0
120 = 0.0
121 = 30.0
122 = 0.0
123 = 25.0
124 = 10.0
125 = 10.0
126 = 9.0
127 = 0.0
128 = 0.0
129 = 16.0
13 = 0.0
130 = 0.0
131 = 18.0
132 = 0.0
133 = 0.0
134 = 18.0
135 = 20.0
136 = 5.0
137 = 0.0
138 = 0.0
139 = 0.0
14 = 0.0
140 = 0.0
141 = 6.0
142 = 0.0
143 = 11.0
144 = 11.0
145 = 0.0
146 = 4.0
147 = 59.0
148 = 9.0
149 = 13.0
15 = 48.0
150 = 2.0
151 = 0.0
152 = 17.0
153 = 36.0
154 = 36.0
155 = 0.0
156 = 3.0
157 = 0.0
158 = 16.0
159 = 0.0
16 = 0.0
160 = 0.0
161 = 0.0
162 = 0.0
163 = 0.0
164 = 4.0
165 = 4.0
166 = 0.0
167 = 0.0
168 = 0.0
169 = 0.0
17 = 0.0
170 = 14.0
171 = 0.0
172 = 24.0
173 = 0.0
174 = 9.0
175 = 0.0
176 = 22.0
177 = 0.0
178 = 4.0
179 = 9.0
18 = 0.0
180 = 4.0
181 = 2.0
182 = 1.0
183 = 2.0
184 = 2.0
185 = 0.0
186 = 2.0
187 

In [159]:
#store the results in a data frame
store_class_opt_res=fm_agg_s
store_class_opt_res['c']=range(len(np.array(fm_agg_s.sales_per_unit_m)))
store_class_opt_res['op_rez']=0.0
pd.DataFrame.head(store_class_opt_res)
for v in prob.variables():
    c_name=pd.to_numeric(v.name[2:])
    store_class_opt_res.iloc[c_name,4]=v.varValue
pd.DataFrame.head(store_class_opt_res)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


Unnamed: 0,store_nbr,class,sales_per_unit_m,c,op_rez
0,1,1002,62.648735,0,0.0
1,1,1003,90.686116,1,0.0
2,1,1004,139.927814,2,39.0
3,1,1006,33.001366,3,0.0
4,1,1008,61.744868,4,0.0


In [162]:
store_class_opt_res=pd.merge(store_class_opt_res,class_items,on='class',how='left')
s=1
if s==0:
    master_store_class_opt_res=store_class_opt_res
else: 
    master_store_class_opt_res=pd.concat([master_store_class_opt_res,store_class_opt_res])
# save the results
store_class_opt_res.to_csv('store_class_opt_res'+ str((store_class_opt_res.store_nbr).unique()) +'.csv',sep=',')
master_store_class_opt_res.to_csv('master_store_class_opt_res.csv',sep=',')

### Write the LP model in a loop over stores

In [164]:
for s in range(len(stores)):
    #filter the data
    fm_agg_s=fm_agg[fm_agg.store_nbr==stores[s]]
    store_max_range=store_items[store_items.store_nbr==stores[s]].no_items
    decision_variables_no=len(np.array(fm_agg_s.sales_per_unit_m))
    class_items_s=class_items[class_items['class'].isin(fm_agg_s['class'])]
    
    # create the LP object, set up as a maximization problem
    prob = pulp.LpProblem('Macrospace', pulp.LpMaximize)
    
    # decision variables: the number of items ranged for each classs
    dec_var_dict={}
    for c in range(len(np.array(fm_agg_s.sales_per_unit_m))):
        v='x_'+str(c)
        dec_var_dict[v]=pulp.LpVariable(v, lowBound=np.array(class_items_s.min_range)[c],upBound=np.array(class_items_s.max_range)[c], cat='Integer')
    
    # objective_function:maximize sales
    sales=0.0
    for c in range(len(np.array(fm_agg_s.sales_per_unit_m))):
        sales=sales+dec_var_dict['x_'+str(c)]*np.array(fm_agg_s.sales_per_unit_m)[c]
    prob += sales  # here's where we actually add it to the obj function
    
    # constraints:maximum items ranged in a store
    store_range=0.0
    for c in range(len(np.array(fm_agg_s.sales_per_unit_m))):
        store_range=store_range + dec_var_dict['x_'+str(c)]
    prob += (store_range <= store_max_range)
    
    # solve the LP using the default solver
    optimization_result = prob.solve()

    # make sure we got an optimal solution
    assert optimization_result == pulp.LpStatusOptimal
    
    #store the results in a data frame
    store_class_opt_res=fm_agg_s
    store_class_opt_res['c']=range(len(np.array(fm_agg_s.sales_per_unit_m)))
    store_class_opt_res['op_rez']=0.0
    pd.DataFrame.head(store_class_opt_res)
    for v in prob.variables():
        c_name=pd.to_numeric(v.name[2:])
        store_class_opt_res.iloc[c_name,4]=v.varValue
    pd.DataFrame.head(store_class_opt_res)
    
    #add the max range for class for checking
    store_class_opt_res=pd.merge(store_class_opt_res,class_items,on='class',how='left')
    
    #create a master df
    if s==0:
        master_store_class_opt_res=store_class_opt_res
    else: 
        master_store_class_opt_res=pd.concat([master_store_class_opt_res,store_class_opt_res])
        
    #write to csv
    store_class_opt_res.to_csv('store_class_opt_res'+ str((store_class_opt_res.store_nbr).unique()) +'.csv',sep=',')
    master_store_class_opt_res.to_csv('master_store_class_opt_res.csv',sep=',')
    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


## Calculate how many products were ranged for May 2017

In [165]:
dtypes={'id': np.int64,'date':np.str,'store_nbr':np.int64,'item_nbr':np.int64,'unit_sales': np.float64,'onpromotion':np.float64}
parse_dates=['date']
train = pd.read_csv('train.csv', dtype=dtypes,parse_dates=parse_dates) # opens the csv file
print("Rows and columns:",train.shape)
pd.DataFrame.head(train)

Rows and columns: (125497040, 6)


Unnamed: 0,id,date,store_nbr,item_nbr,unit_sales,onpromotion
0,0,2013-01-01,25,103665,7.0,
1,1,2013-01-01,25,105574,1.0,
2,2,2013-01-01,25,105575,2.0,
3,3,2013-01-01,25,108079,1.0,
4,4,2013-01-01,25,108701,1.0,


In [166]:
dtypes={'item_nbr': np.int64,'family':np.str,'class':np.int64,'perishable':np.int64}
items = pd.read_csv('items.csv',dtype=dtypes) # opens the csv file
print("Rows and columns:",items.shape)
pd.DataFrame.head(items)

Rows and columns: (4100, 4)


Unnamed: 0,item_nbr,family,class,perishable
0,96995,GROCERY I,1093,0
1,99197,GROCERY I,1067,0
2,103501,CLEANING,3008,0
3,103520,GROCERY I,1028,0
4,103665,BREAD/BAKERY,2712,1


In [169]:
train['month_year']=100*train['date'].dt.year+train['date'].dt.month

In [170]:
train_may=train[train['month_year']==201705]
print("Rows and columns:",train_may.shape)
pd.DataFrame.head(train_may)

Rows and columns: (3301627, 7)


Unnamed: 0,id,date,store_nbr,item_nbr,unit_sales,onpromotion,month_year
114176250,114176250,2017-05-01,1,103520,1.0,0.0,201705
114176251,114176251,2017-05-01,1,105574,4.0,0.0,201705
114176252,114176252,2017-05-01,1,105575,4.0,0.0,201705
114176253,114176253,2017-05-01,1,106716,1.0,0.0,201705
114176254,114176254,2017-05-01,1,108696,4.0,0.0,201705


In [171]:
merged_may=pd.merge(train_may,items,on='item_nbr', how='left')
print("Rows and columns:",merged_may.shape)
pd.DataFrame.head(merged_may)

Rows and columns: (3301627, 10)


Unnamed: 0,id,date,store_nbr,item_nbr,unit_sales,onpromotion,month_year,family,class,perishable
0,114176250,2017-05-01,1,103520,1.0,0.0,201705,GROCERY I,1028,0
1,114176251,2017-05-01,1,105574,4.0,0.0,201705,GROCERY I,1045,0
2,114176252,2017-05-01,1,105575,4.0,0.0,201705,GROCERY I,1045,0
3,114176253,2017-05-01,1,106716,1.0,0.0,201705,GROCERY I,1032,0
4,114176254,2017-05-01,1,108696,4.0,0.0,201705,DELI,2636,1


In [199]:
group=merged_may.groupby(['store_nbr','class'])
may_store_class_items=pd.DataFrame(group['item_nbr'].agg('nunique'))
may_store_class_items['store_nbr']=may_store_class_items.index.get_level_values('store_nbr')
may_store_class_items['class']=may_store_class_items.index.get_level_values('class')
may_store_class_items.columns=['ranged_items','store_nbr','class']
pd.DataFrame.head(may_store_class_items)

Unnamed: 0_level_0,Unnamed: 1_level_0,ranged_items,store_nbr,class
store_nbr,class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1002,25,1,1002
1,1003,2,1,1003
1,1004,30,1,1004
1,1006,3,1,1006
1,1008,10,1,1008


In [200]:
may_store_class_items.to_csv('may_store_class_items.csv',sep=',',index=False)