In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import mean_squared_error 
from sklearn.metrics import accuracy_score
from sklearn.metrics import r2_score


In [4]:
data=pd.read_csv("/content/MonthWiseMarketArrivals_Clean.csv")

In [5]:
data.shape

(10227, 10)

In [6]:
data.head()

Unnamed: 0,market,month,year,quantity,priceMin,priceMax,priceMod,state,city,date
0,ABOHAR(PB),January,2005,2350,404,493,446,PB,ABOHAR,January-2005
1,ABOHAR(PB),January,2006,900,487,638,563,PB,ABOHAR,January-2006
2,ABOHAR(PB),January,2010,790,1283,1592,1460,PB,ABOHAR,January-2010
3,ABOHAR(PB),January,2011,245,3067,3750,3433,PB,ABOHAR,January-2011
4,ABOHAR(PB),January,2012,1035,523,686,605,PB,ABOHAR,January-2012


In [7]:
data.date = pd.DatetimeIndex(data.date)

In [8]:
data


Unnamed: 0,market,month,year,quantity,priceMin,priceMax,priceMod,state,city,date
0,ABOHAR(PB),January,2005,2350,404,493,446,PB,ABOHAR,2005-01-01
1,ABOHAR(PB),January,2006,900,487,638,563,PB,ABOHAR,2006-01-01
2,ABOHAR(PB),January,2010,790,1283,1592,1460,PB,ABOHAR,2010-01-01
3,ABOHAR(PB),January,2011,245,3067,3750,3433,PB,ABOHAR,2011-01-01
4,ABOHAR(PB),January,2012,1035,523,686,605,PB,ABOHAR,2012-01-01
...,...,...,...,...,...,...,...,...,...,...
10222,YEOLA(MS),December,2011,131326,282,612,526,MS,YEOLA,2011-12-01
10223,YEOLA(MS),December,2012,207066,485,1327,1136,MS,YEOLA,2012-12-01
10224,YEOLA(MS),December,2013,215883,472,1427,1177,MS,YEOLA,2013-12-01
10225,YEOLA(MS),December,2014,201077,446,1654,1456,MS,YEOLA,2014-12-01


In [9]:
data.isnull().sum()

market      0
month       0
year        0
quantity    0
priceMin    0
priceMax    0
priceMod    0
state       0
city        0
date        0
dtype: int64

In [10]:
data

Unnamed: 0,market,month,year,quantity,priceMin,priceMax,priceMod,state,city,date
0,ABOHAR(PB),January,2005,2350,404,493,446,PB,ABOHAR,2005-01-01
1,ABOHAR(PB),January,2006,900,487,638,563,PB,ABOHAR,2006-01-01
2,ABOHAR(PB),January,2010,790,1283,1592,1460,PB,ABOHAR,2010-01-01
3,ABOHAR(PB),January,2011,245,3067,3750,3433,PB,ABOHAR,2011-01-01
4,ABOHAR(PB),January,2012,1035,523,686,605,PB,ABOHAR,2012-01-01
...,...,...,...,...,...,...,...,...,...,...
10222,YEOLA(MS),December,2011,131326,282,612,526,MS,YEOLA,2011-12-01
10223,YEOLA(MS),December,2012,207066,485,1327,1136,MS,YEOLA,2012-12-01
10224,YEOLA(MS),December,2013,215883,472,1427,1177,MS,YEOLA,2013-12-01
10225,YEOLA(MS),December,2014,201077,446,1654,1456,MS,YEOLA,2014-12-01


In [11]:
data_state = data.groupby(["state","market"], as_index = False).count()
data_state.market.unique()

array(['HYDERABAD', 'KURNOOL(AP)', 'RAJAHMUNDRY(AP)', 'GUWAHATI',
       'BIHARSHARIF(BHR)', 'PATNA', 'CHANDIGARH', 'DELHI',
       'AHMEDABAD(GUJ)', 'BHAVNAGAR(GUJ)', 'DEESA(GUJ)', 'GONDAL(GUJ)',
       'JAMNAGAR(GUJ)', 'MAHUVA(GUJ)', 'RAJKOT(GUJ)', 'SURAT(GUJ)',
       'SHIMLA', 'KARNAL(HR)', 'RAIPUR(CHGARH)', 'RANCHI(JH)', 'JAMMU',
       'SRINAGAR', 'TRIVENDRUM', 'PALAYAM(KER)', 'BANGALORE',
       'BELGAUM(KNT)', 'BIJAPUR(KNT)', 'CHALLAKERE(KNT)',
       'CHICKBALLAPUR(KNT)', 'COIMBATORE(TN) (bellary)',
       'DHAVANGERE(KNT)', 'HASSAN(KNT)', 'HUBLI(KNT)', 'KOLAR(KNT)',
       'RAICHUR(KNT)', 'BHOPAL', 'DEWAS(MP)', 'INDORE(MP)',
       'MANDSOUR(MP)', 'NEEMUCH(MP)', 'SAGAR(MP)', 'UJJAIN(MP)',
       'AHMEDNAGAR(MS)', 'BOMBORI(MS)', 'CHAKAN(MS)', 'CHANDVAD(MS)',
       'DEVALA(MS)', 'DHULIA(MS)', 'DINDORI(MS)', 'JALGAON(MS)',
       'JALGAON(WHITE)', 'JUNNAR(MS)', 'KALVAN(MS)', 'KOLHAPUR(MS)',
       'KOPERGAON(MS)', 'LASALGAON(MS)', 'LONAND(MS)', 'MALEGAON(MS)',
       'MANMAD(MS

In [12]:
data.index = pd.PeriodIndex(data.date, freq="M")
data.head()

Unnamed: 0_level_0,market,month,year,quantity,priceMin,priceMax,priceMod,state,city,date
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2005-01,ABOHAR(PB),January,2005,2350,404,493,446,PB,ABOHAR,2005-01-01
2006-01,ABOHAR(PB),January,2006,900,487,638,563,PB,ABOHAR,2006-01-01
2010-01,ABOHAR(PB),January,2010,790,1283,1592,1460,PB,ABOHAR,2010-01-01
2011-01,ABOHAR(PB),January,2011,245,3067,3750,3433,PB,ABOHAR,2011-01-01
2012-01,ABOHAR(PB),January,2012,1035,523,686,605,PB,ABOHAR,2012-01-01


In [13]:
data.city.unique()

array(['ABOHAR', 'AGRA', 'AHMEDABAD', 'AHMEDNAGAR', 'AJMER', 'ALIGARH',
       'ALWAR', 'AMRITSAR', 'BALLIA', 'BANGALORE', 'BAREILLY', 'BELGAUM',
       'BHATINDA', 'BHAVNAGAR', 'BHOPAL', 'BHUBNESWER', 'BIHARSHARIF',
       'BIJAPUR', 'BIKANER', 'BOMBORI', 'BURDWAN', 'CHAKAN', 'CHALLAKERE',
       'CHANDIGARH', 'CHANDVAD', 'CHENNAI', 'CHICKBALLAPUR', 'COIMBATORE',
       'DEESA', 'DEHRADOON', 'DELHI', 'DEORIA', 'DEVALA', 'DEWAS',
       'DHAVANGERE', 'DHULIA', 'DINDIGUL', 'DINDORI', 'ETAWAH', 'GONDAL',
       'GORAKHPUR', 'GUWAHATI', 'HALDWANI', 'HASSAN', 'HOSHIARPUR',
       'HUBLI', 'HYDERABAD', 'INDORE', 'JAIPUR', 'JALANDHAR', 'JALGAON',
       'JAMMU', 'JAMNAGAR', 'JODHPUR', 'JUNNAR', 'KALVAN', 'KANPUR',
       'KARNAL', 'KHANNA', 'KOLAR', 'KOLHAPUR', 'KOLKATA', 'KOPERGAON',
       'KOTA', 'KURNOOL', 'LASALGAON', 'LONAND', 'LUCKNOW', 'LUDHIANA',
       'MADURAI', 'MAHUVA', 'MALEGAON', 'MANDSOUR', 'MANMAD', 'MEERUT',
       'MIDNAPUR', 'MUMBAI', 'NAGPUR', 'NANDGAON', 'NASIK', 'NEEMU

In [14]:
#QUESTION 1:monthwise MUMBAI modeprice

In [15]:
data_MUMBAI = data.loc[data.city == "MUMBAI"].copy()
data_MUMBAI= data.drop("market",axis=1,inplace=True)
data_MUMBAI = data.drop("quantity",axis=1,inplace=True)
data_MUMBAI= data.drop("priceMin",axis=1,inplace=True)
data_MUMBAI=data.drop("priceMax",axis=1,inplace=True)
data_MUMBAI=data.drop("state",axis=1,inplace=True)

data_MUMBAI

In [16]:
data_MUMBAI = data.loc[data.city == "MUMBAI"].copy()




data_MUMBAI

Unnamed: 0_level_0,month,year,priceMod,city,date
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2004-01,January,2004,849,MUMBAI,2004-01-01
2005-01,January,2005,387,MUMBAI,2005-01-01
2006-01,January,2006,402,MUMBAI,2006-01-01
2007-01,January,2007,997,MUMBAI,2007-01-01
2008-01,January,2008,448,MUMBAI,2008-01-01
...,...,...,...,...,...
2011-12,December,2011,749,MUMBAI,2011-12-01
2012-12,December,2012,1330,MUMBAI,2012-12-01
2013-12,December,2013,1639,MUMBAI,2013-12-01
2014-12,December,2014,1802,MUMBAI,2014-12-01


In [17]:
data2015MUMBAI = data[(data.year == 2015) & (data.city == 'MUMBAI')]

In [18]:
data2015MUMBAI
#MONTHWISE MODPRICE

Unnamed: 0_level_0,month,year,priceMod,city,date
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01,January,2015,1443,MUMBAI,2015-01-01
2015-02,February,2015,1513,MUMBAI,2015-02-01
2015-03,March,2015,1435,MUMBAI,2015-03-01
2015-04,April,2015,1327,MUMBAI,2015-04-01
2015-05,May,2015,1572,MUMBAI,2015-05-01
2015-06,June,2015,2015,MUMBAI,2015-06-01
2015-07,July,2015,2248,MUMBAI,2015-07-01
2015-08,August,2015,4361,MUMBAI,2015-08-01
2015-09,September,2015,4714,MUMBAI,2015-09-01
2015-10,October,2015,3748,MUMBAI,2015-10-01


In [23]:
np.random.seed(2014)
data = pd.DataFrame({'A':np.random.rand(5)})

In [24]:
data

Unnamed: 0,A
0,0.185581
1,0.301659
2,0.337738
3,0.990038
4,0.396088


In [25]:
date = pd.date_range('January-2005', 'January-2014')
date = date[date.month.isin([1])].strftime('%m-%d')

In [26]:
print(len(date))

280
