In [2]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.preprocessing import PolynomialFeatures

In [7]:
# load data
df = pd.read_csv('Global_food_prices_EDA.csv')
country = df['country_name']

# filter country for ukraine
ukraine = df[df['country_name'] == 'Ukraine']

# save ukraine data to csv
ukraine.to_csv('ukraine.csv', index=False)


In [3]:
# read ukraine csv
ukraine = pd.read_csv('ukraine.csv', index_col=0)
ukraine

Unnamed: 0_level_0,country_name,locality_id,market_id,market_name,commdity_id,commodity_name,currency_id,currency_name,market_type_id,market_type,measure_id,measure_name,month,year,price_paid,mp_commoditysource
country_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
254,Ukraine,0,1295,National Average,52,Rice,93,UAH,15,Retail,5,KG,3,2014,8.6567,Government
254,Ukraine,0,1295,National Average,52,Rice,93,UAH,15,Retail,5,KG,4,2014,9.4033,Government
254,Ukraine,0,1295,National Average,52,Rice,93,UAH,15,Retail,5,KG,5,2014,9.8967,Government
254,Ukraine,0,1295,National Average,52,Rice,93,UAH,15,Retail,5,KG,6,2014,10.2567,Government
254,Ukraine,0,1295,National Average,52,Rice,93,UAH,15,Retail,5,KG,7,2014,10.2933,Government
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
254,Ukraine,3159,1884,Kiev,376,Fat (salo),93,UAH,15,Retail,5,KG,2,2017,39.8700,Government
254,Ukraine,3159,1884,Kiev,376,Fat (salo),93,UAH,15,Retail,5,KG,3,2017,39.8700,Government
254,Ukraine,3159,1884,Kiev,376,Fat (salo),93,UAH,15,Retail,5,KG,4,2017,39.8700,Government
254,Ukraine,3159,1884,Kiev,376,Fat (salo),93,UAH,15,Retail,5,KG,5,2017,39.8700,Government


In [4]:
# print the 2 available market_types
print(ukraine['market_type'].unique())

# group by market_name for commodity:rice and get the average price_paid with the year
ukraine_rice = ukraine[ukraine['commodity_name'] == 'Rice']
ukraine_rice = ukraine_rice.groupby(['market_id','market_name','year'])['price_paid'].mean()
# ukraine_rice

# replace market_names with numbers for logistic regression
ukraine_rice = ukraine_rice.reset_index()
# ukraine_rice['market_name'] = ukraine_rice['market_name'].replace(['Cherkasy', 'Chernihiv', 'Chernivtsi', 'Dnipropetrovsk', 'Donetska', 'Herson', 'Ivano-Frankivsk', 'Kharkivka', 'Khmelnytsky', 'Kiev', 'Kirovograd', 'Luhanska', 'Lviv', 'Mykolaiv', 'National Average', 'Odessa', 'Poltava', 'Rivne', 'Sums', 'Ternopil', 'Vinnitsa', 'Volyn', 'Zakarpattya', 'Zaporizhia', 'Zhytomyr', 'm. Kyiv'], [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, 16, 17, 18, 19, 20, 21, 22, 23, 24,25,26])
ukraine_rice 


['Retail']


Unnamed: 0,market_id,market_name,year,price_paid
0,1295,National Average,2014,11.158670
1,1295,National Average,2015,19.225558
2,1295,National Average,2016,15.993892
3,1295,National Average,2017,17.240000
4,1858,Cherkasy,2014,11.532340
...,...,...,...,...
99,1882,Zhytomyr,2017,17.019433
100,1884,Kiev,2014,11.854330
101,1884,Kiev,2015,20.628608
102,1884,Kiev,2016,15.796383


#### Statistical Model - multinomial logistic regression

In [5]:
# create multinomial logistic regression
x = ukraine_rice[['market_id','year']]
y = ukraine_rice['price_paid']

# add constant
x = sm.add_constant(x)

# fit model OLS
model = sm.OLS(y,x)
result = model.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:             price_paid   R-squared:                       0.316
Model:                            OLS   Adj. R-squared:                  0.302
Method:                 Least Squares   F-statistic:                     23.29
Date:                Mon, 22 Jan 2024   Prob (F-statistic):           4.83e-09
Time:                        18:57:59   Log-Likelihood:                -244.11
No. Observations:                 104   AIC:                             494.2
Df Residuals:                     101   BIC:                             502.2
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const      -3081.2237    453.885     -6.789      0.0

#### From the above model we can see that the R-square value is quite low suggesting that this is not a strong model. the p-value for market_id is very

In [7]:
#dropped market_id 
x = ukraine_rice[['year']]
y = ukraine_rice['price_paid']

# add constant
x = sm.add_constant(x)

# fit model OLS
model = sm.OLS(y,x)
result = model.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:             price_paid   R-squared:                       0.316
Model:                            OLS   Adj. R-squared:                  0.309
Method:                 Least Squares   F-statistic:                     47.03
Date:                Mon, 22 Jan 2024   Prob (F-statistic):           5.50e-10
Time:                        18:59:02   Log-Likelihood:                -244.11
No. Observations:                 104   AIC:                             492.2
Df Residuals:                     102   BIC:                             497.5
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const      -3081.3874    451.638     -6.823      0.0

#### From the above model we see that the R-squared value is the same suggesting that there was not a big change from the previous model hwever the Adj.R-squared has increased by a small amou t suggesting this is a better model and that price is dependent on the year. it suggests that for every increase in year 

In [6]:
# # fit model MNLogit
# x = ukraine_rice[['market_name','year']]
# y = ukraine_rice['price_paid']
# model = sm.MNLogit(y,x)
# result = model.fit()
# print(result.summary())