In [142]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.preprocessing import PolynomialFeatures

In [145]:
# load data
df = pd.read_csv('Global_food_prices_EDA.csv')
country = df['country_name']

# filter country for ukraine
ukraine = df[df['country_name'] == 'Ukraine']

# save ukraine data to csv
ukraine.to_csv('ukraine.csv', index=False)


In [146]:
# read ukraine csv
ukraine = pd.read_csv('ukraine.csv', index_col=0)
ukraine

Unnamed: 0_level_0,country_name,locality_id,market_id,market_name,commdity_id,commodity_name,currency_id,currency_name,market_type_id,market_type,measure_id,measure_name,month,year,price_paid,mp_commoditysource
country_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
254,Ukraine,0,1295,National Average,52,Rice,93,UAH,15,Retail,5,KG,3,2014,8.6567,Government
254,Ukraine,0,1295,National Average,52,Rice,93,UAH,15,Retail,5,KG,4,2014,9.4033,Government
254,Ukraine,0,1295,National Average,52,Rice,93,UAH,15,Retail,5,KG,5,2014,9.8967,Government
254,Ukraine,0,1295,National Average,52,Rice,93,UAH,15,Retail,5,KG,6,2014,10.2567,Government
254,Ukraine,0,1295,National Average,52,Rice,93,UAH,15,Retail,5,KG,7,2014,10.2933,Government
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
254,Ukraine,3159,1884,Kiev,376,Fat (salo),93,UAH,15,Retail,5,KG,2,2017,39.8700,Government
254,Ukraine,3159,1884,Kiev,376,Fat (salo),93,UAH,15,Retail,5,KG,3,2017,39.8700,Government
254,Ukraine,3159,1884,Kiev,376,Fat (salo),93,UAH,15,Retail,5,KG,4,2017,39.8700,Government
254,Ukraine,3159,1884,Kiev,376,Fat (salo),93,UAH,15,Retail,5,KG,5,2017,39.8700,Government


In [174]:
# print the 2 available market_types
print(ukraine['market_type'].unique())

# group by market_name for commodity:rice and get the average price_paid with the year
ukraine_rice = ukraine[ukraine['commodity_name'] == 'Rice']
ukraine_rice = ukraine_rice.groupby(['market_name','year'])['price_paid'].mean()
ukraine_rice

# replace market_names with numbers for logistic regression
ukraine_rice = ukraine_rice.reset_index()
ukraine_rice['market_name'] = ukraine_rice['market_name'].replace(['Cherkasy', 'Chernihiv', 'Chernivtsi', 'Dnipropetrovsk', 'Donetska', 'Herson', 'Ivano-Frankivsk', 'Kharkivka', 'Khmelnytsky', 'Kiev', 'Kirovograd', 'Luhanska', 'Lviv', 'Mykolaiv', 'National Average', 'Odessa', 'Poltava', 'Rivne', 'Sums', 'Ternopil', 'Vinnitsa', 'Volyn', 'Zakarpattya', 'Zaporizhia', 'Zhytomyr', 'm. Kyiv'], [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, 16, 17, 18, 19, 20, 21, 22, 23, 24,25,26])
ukraine_rice 


['Retail']


Unnamed: 0,market_name,year,price_paid
0,1,2014,11.532340
1,1,2015,19.409167
2,1,2016,16.055825
3,1,2017,17.683333
4,2,2014,11.062000
...,...,...,...
99,25,2017,17.019433
100,26,2014,11.489680
101,26,2015,20.367775
102,26,2016,16.025000


#### Statistical Model - multinomial logistic regression

In [182]:
# create multinomial logistic regression
x = ukraine_rice[['market_name','year']]
y = ukraine_rice['price_paid']

# add constant
x = sm.add_constant(x)

# fit model OLS
model = sm.OLS(y,x)
result = model.fit()
print(result.summary())

# fit model MNLogit
model = sm.MNLogit(y,x)
result = model.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:             price_paid   R-squared:                       0.316
Model:                            OLS   Adj. R-squared:                  0.302
Method:                 Least Squares   F-statistic:                     23.29
Date:                Sat, 20 Jan 2024   Prob (F-statistic):           4.82e-09
Time:                        22:11:00   Log-Likelihood:                -244.11
No. Observations:                 104   AIC:                             494.2
Df Residuals:                     101   BIC:                             502.2
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const       -3081.4220    453.856     -6.789      

  eXB = np.column_stack((np.ones(len(X)), np.exp(X)))
  return eXB/eXB.sum(1)[:,None]


Optimization terminated successfully.
         Current function value: nan
         Iterations 6
                          MNLogit Regression Results                          
Dep. Variable:             price_paid   No. Observations:                  104
Model:                        MNLogit   Df Residuals:                     -205
Method:                           MLE   Df Model:                          206
Date:                Sat, 20 Jan 2024   Pseudo R-squ.:                     nan
Time:                        22:11:01   Log-Likelihood:                    nan
converged:                       True   LL-Null:                       -483.02
Covariance Type:            nonrobust   LLR p-value:                       nan
            price_paid=10.412       coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------------------------
const                                nan        nan        nan        nan  