In [None]:
#importing packages
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import glob

In [None]:
os.chdir('/content/drive/MyDrive/gro_homework')

In [None]:
df_prod = pd.read_csv('Production Quantity.csv', parse_dates=['start_date', 'end_date']) #parses the two columns as datetime format
df_prod = df_prod.set_index(pd.to_datetime(df_prod['start_date'].dt.date)) #setting datetime value as datetimeindex
df_prod = df_prod.drop(['start_date','end_date'], axis = 1) #dates are not required anymore

In [None]:
df_prod.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 720 entries, 2015-01-01 to 2020-12-01
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   prod       720 non-null    int64
 1   region_id  720 non-null    int64
dtypes: int64(2)
memory usage: 16.9 KB


In [None]:
df_prod.head()

Unnamed: 0_level_0,prod,region_id
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-01,171725,93
2015-02-01,188325,93
2015-03-01,247856,93
2015-04-01,282791,93
2015-05-01,291057,93


#### We do not know exactly how long it takes for the Grople fruit to reach maturity and be harvested. We also don't know how long it takes to manufacture the syrup from the fruit. I am making the following assumptions 

#### It takes 6 months for the fruit to grow and then be harvested.
#### The syrup is manufactured on the same day the fruit is harvested.

#### The first available monthly production quantity is from January 2015. This means that data averaged from June 2014 up till December 2014 will be related to the production quantity in January 2015. Similarly, data from July 2014 up till January 2015 will be related to the production quantity in February 2015. 

In [None]:
def preprocess(file):
  df = pd.read_csv(file, parse_dates=['start_date', 'end_date'])
  df = df.drop(['end_date'], axis = 1)#either of the date columns provide enough info. They are duplicates anyway
  df = df[~(df['start_date'] < '2014-06-01')]#Filtering for values from July 2014
  df = df[~(df['start_date'] > '2020-11-30')]#Filtering for values until November 2020
  df['start_date'] = pd.to_datetime(df['start_date'].dt.date)#The hours,minutes and seconds are irrelevant
  
  #First I will split this dataframe on their region IDs. This makes the aggregation process I am about to do next much easier
  #dfdict will contain 10 dataframes, 1 for each region
  
  regions = df.region_id.unique()
  dfdict = {elem : pd.DataFrame for elem in regions}
  for key in dfdict.keys():
    dfdict[key] = df[:][df.region_id == key]
  for key in dfdict.keys():
    dframe = dfdict[key]
    #obtaining a monthly average
    dframe = dframe.groupby(pd.Grouper(key = 'start_date',freq = 'M')).mean()
    dfdict[key] = dframe
  df = pd.DataFrame()
  for key in dfdict.keys():
    df = df.append(dfdict[key])
  return df

In [None]:
#reading in the files and preprocessing
df_precip = preprocess('Daily Precipitation.csv')
df_soil = preprocess('Daily Soil Mositure.csv')
df_temp = preprocess('Daily Temperature.csv')
df_ndvi = preprocess('Eight Day NDVI.csv')

In [None]:
df_precip.head()

Unnamed: 0_level_0,precip,region_id
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2014-06-30,4.44349,93.0
2014-07-31,5.095036,93.0
2014-08-31,5.476608,93.0
2014-09-30,5.269886,93.0
2014-10-31,5.571791,93.0


In [None]:
df_ndvi.region_id.value_counts()

93.0     78
94.0     78
95.0     78
98.0     78
99.0     78
102.0    78
103.0    78
97.0     77
104.0    77
105.0    76
Name: region_id, dtype: int64

In [None]:
df_precip.region_id.value_counts()

93.0     78
94.0     78
95.0     78
97.0     78
98.0     78
99.0     78
102.0    78
103.0    78
104.0    78
105.0    78
Name: region_id, dtype: int64

In [None]:
df_temp.region_id.value_counts()

93.0     78
94.0     78
95.0     78
97.0     78
98.0     78
99.0     78
102.0    78
103.0    78
104.0    78
105.0    78
Name: region_id, dtype: int64

In [None]:
df_soil.region_id.value_counts()

93.0     78
94.0     78
95.0     78
97.0     78
98.0     78
99.0     78
102.0    78
103.0    78
104.0    78
105.0    78
Name: region_id, dtype: int64

### It is observed that the NDVI dataframe is missing a couple of values. All the other dataframes have the same number of values per region ID.

## Investigating the NDVI dataframe


In [None]:
df_ndvi.isna().any()

ndvi         True
region_id    True
dtype: bool

In [None]:
#deleting null values
df_ndvi = df_ndvi.dropna()

#### It is observed that the Eight-Day NDVI file has some missing values. I will replace these with the average NDVI

In [None]:
regions = df_ndvi.region_id.unique()
dfdict = {elem : pd.DataFrame for elem in regions}
for key in dfdict.keys():
  dfdict[key] = df_ndvi[:][df_ndvi.region_id == key]

In [None]:
temp = dfdict[97.0]

In [None]:
my_range = pd.date_range(
  start="2014-07-31", end="2020-11-30", freq='M') 

In [None]:
print(my_range.difference(temp.index))

DatetimeIndex(['2016-11-30'], dtype='datetime64[ns]', freq=None)


In [None]:
#filling in the missing date
temp.loc[pd.to_datetime('2016-11-30')] = [temp['ndvi'].mean(), 97.0]
temp = temp.sort_index()

In [None]:
temp.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 78 entries, 2014-06-30 to 2020-11-30
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   ndvi       78 non-null     float64
 1   region_id  78 non-null     float64
dtypes: float64(2)
memory usage: 1.8 KB


In [None]:
dfdict[97.0] = temp

In [None]:
temp = dfdict[104.0]
print(my_range.difference(temp.index))

DatetimeIndex(['2016-11-30'], dtype='datetime64[ns]', freq=None)


In [None]:
temp.loc[pd.to_datetime('2016-11-30')] = [temp['ndvi'].mean(), 104.0]
temp = temp.sort_index()

In [None]:
dfdict[104.0] = temp

In [None]:
temp = dfdict[105.0]
print(my_range.difference(temp.index))

DatetimeIndex(['2016-11-30', '2018-01-31'], dtype='datetime64[ns]', freq=None)


In [None]:
temp.loc[pd.to_datetime('2016-11-30')] = [temp['ndvi'].mean(), 105.0]
temp.loc[pd.to_datetime('2018-01-31')] = [temp['ndvi'].mean(), 105.0]
temp = temp.sort_index()

In [None]:
dfdict[105.0] = temp

In [None]:
df = pd.DataFrame()
for key in dfdict.keys():
  df = df.append(dfdict[key])
df_ndvi = df

In [None]:
df_ndvi.region_id.value_counts()

93.0     78
94.0     78
95.0     78
97.0     78
98.0     78
99.0     78
102.0    78
103.0    78
104.0    78
105.0    78
Name: region_id, dtype: int64

In [None]:
df_precip.region_id.value_counts()

93.0     78
94.0     78
95.0     78
97.0     78
98.0     78
99.0     78
102.0    78
103.0    78
104.0    78
105.0    78
Name: region_id, dtype: int64

#### Now we have filled in all the missing values

##Calculating the average of values over 6 months for each production month. 

In [None]:
precip = []
for val in df_precip.region_id.unique():
  arr = []
  df = df_precip.loc[df_precip['region_id'] == val]
  for i in range(df.shape[0]-6):
    arr.append(df.iloc[i:i+6]['precip'].mean())
  precip+=arr

In [None]:
temperature = []
for val in df_temp.region_id.unique():
  arr = []
  df = df_temp.loc[df_temp['region_id'] == val]
  for i in range(df.shape[0]-6):
    arr.append(df.iloc[i:i+6]['temp'].mean())
  temperature+=arr

In [None]:
smos = []
for val in df_soil.region_id.unique():
  arr = []
  df = df_soil.loc[df_soil['region_id'] == val]
  for i in range(df.shape[0]-6):
    arr.append(df.iloc[i:i+6]['smos'].mean())
  smos+=arr

In [None]:
ndvi = []
for val in df_ndvi.region_id.unique():
  arr = []
  df = df_ndvi.loc[df_ndvi['region_id'] == val]
  for i in range(df.shape[0]-6):
    arr.append(df.iloc[i:i+6]['ndvi'].mean())
  ndvi+=arr

In [None]:
df_prod.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 720 entries, 2015-01-01 to 2020-12-01
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   prod       720 non-null    int64
 1   region_id  720 non-null    int64
dtypes: int64(2)
memory usage: 16.9 KB


## Appending the averaged values to the production quantity dataframe

In [None]:
df_prod['precip'] = precip
df_prod['temp'] = temperature
df_prod['smos'] = smos
df_prod['ndvi'] = ndvi

In [None]:
df_prod.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 720 entries, 2015-01-01 to 2020-12-01
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   prod       720 non-null    int64  
 1   region_id  720 non-null    int64  
 2   precip     720 non-null    float64
 3   temp       720 non-null    float64
 4   smos       720 non-null    float64
 5   ndvi       720 non-null    float64
dtypes: float64(4), int64(2)
memory usage: 39.4 KB


In [None]:
df_prod.head()

Unnamed: 0_level_0,prod,region_id,precip,temp,smos,ndvi
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-01,171725,93,5.973499,26.327291,0.299227,0.778475
2015-02-01,188325,93,7.276377,26.161226,0.30052,0.769292
2015-03-01,247856,93,7.159613,26.042288,0.302237,0.766905
2015-04-01,282791,93,6.447868,26.297789,0.296906,0.761228
2015-05-01,291057,93,6.145329,26.343127,0.284202,0.755204


## Normalizing the features

In [None]:
df_min_max_scaled = df_prod.copy()
for column in ['precip','temp','smos','ndvi']:
  df_min_max_scaled[column] = (df_min_max_scaled[column] - df_min_max_scaled[column].min()) / (df_min_max_scaled[column].max() - df_min_max_scaled[column].min())

In [None]:
df_min_max_scaled.head()

Unnamed: 0_level_0,prod,region_id,precip,temp,smos,ndvi
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-01,171725,93,0.257416,0.40682,0.752068,0.443201
2015-02-01,188325,93,0.375302,0.378671,0.756819,0.361563
2015-03-01,247856,93,0.364737,0.35851,0.763127,0.340342
2015-04-01,282791,93,0.300337,0.401819,0.743539,0.289872
2015-05-01,291057,93,0.272963,0.409505,0.696863,0.236317


In [None]:
final_df = df_min_max_scaled.copy()

In [None]:
#randomly shuffling the dataset so that the model learns better
final_df = final_df.sample(frac=1)
final_df.head()

Unnamed: 0_level_0,prod,region_id,precip,temp,smos,ndvi
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-02-01,24262,95,0.456032,0.517665,0.612498,0.65463
2018-11-01,62102,97,0.146883,0.903364,0.406069,0.707877
2018-04-01,42942,104,0.385913,0.797734,0.865419,0.462834
2019-02-01,43806,104,0.455919,0.885202,0.828312,0.489314
2017-02-01,155267,98,0.390787,0.355608,0.267691,0.608935


In [None]:
#The region ID is simply a category and is not to be treated as an integer variable affecting the production quantity
final_df['region_id'] = final_df.region_id.astype('category')

In [None]:
x = final_df.drop('prod',axis=1)
y = final_df['prod']

In [None]:
regions = pd.get_dummies(x,drop_first=True)
x = x.drop('region_id',axis=1)
x = pd.concat([x,regions],axis=1)
x

Unnamed: 0_level_0,precip,temp,smos,ndvi,precip,temp,smos,ndvi,region_id_94,region_id_95,region_id_97,region_id_98,region_id_99,region_id_102,region_id_103,region_id_104,region_id_105
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2019-02-01,0.456032,0.517665,0.612498,0.654630,0.456032,0.517665,0.612498,0.654630,0,1,0,0,0,0,0,0,0
2018-11-01,0.146883,0.903364,0.406069,0.707877,0.146883,0.903364,0.406069,0.707877,0,0,1,0,0,0,0,0,0
2018-04-01,0.385913,0.797734,0.865419,0.462834,0.385913,0.797734,0.865419,0.462834,0,0,0,0,0,0,0,1,0
2019-02-01,0.455919,0.885202,0.828312,0.489314,0.455919,0.885202,0.828312,0.489314,0,0,0,0,0,0,0,1,0
2017-02-01,0.390787,0.355608,0.267691,0.608935,0.390787,0.355608,0.267691,0.608935,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-10-01,0.214316,0.441705,0.482121,0.891926,0.214316,0.441705,0.482121,0.891926,0,0,0,0,0,1,0,0,0
2019-06-01,0.590728,0.332691,0.693032,0.830087,0.590728,0.332691,0.693032,0.830087,0,0,0,0,0,0,1,0,0
2019-10-01,0.249139,0.616653,0.504692,0.586022,0.249139,0.616653,0.504692,0.586022,0,1,0,0,0,0,0,0,0
2015-02-01,0.572268,0.123387,0.656552,0.544184,0.572268,0.123387,0.656552,0.544184,0,0,0,0,1,0,0,0,0


## Splitting and shuffling the dataset

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)

In [None]:
from sklearn.linear_model import LinearRegression
LR = LinearRegression()
LR.fit(x_train,y_train)
y_prediction =  LR.predict(x_test)

In [None]:
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
score=r2_score(y_test,y_prediction)
print('The R2 score is ',score)

The R2 score is  0.9368438140563538


### The best possible value for the R2 score is 1.0 . My model has achieved 0.93 . With the previous iteration, I had achieved 0.94 . I chalked that down to the shuffling randomness.

### To predict the production quantities from January 2021 to December 2021, we will need data from July 2020 until November 2021, as per my assumption made at the beginning of this notebook

In [None]:
def preprocess_test(file): #similar to the original preprocess function but this time getting values from July 2020 until November 2021
  df = pd.read_csv(file, parse_dates=['start_date', 'end_date'])
  df = df.drop(['end_date'], axis = 1)
  df = df[~(df['start_date'] < '2020-06-01')]
  df = df[~(df['start_date'] > '2021-11-30')]
  df['start_date'] = pd.to_datetime(df['start_date'].dt.date)
  regions = df.region_id.unique()
  dfdict = {elem : pd.DataFrame for elem in regions}
  for key in dfdict.keys():
    dfdict[key] = df[:][df.region_id == key]
  for key in dfdict.keys():
    dframe = dfdict[key]
    dframe = dframe.groupby(pd.Grouper(key = 'start_date',freq = 'M')).mean()
    dfdict[key] = dframe
  df = pd.DataFrame()
  for key in dfdict.keys():
    df = df.append(dfdict[key])
  return df

In [None]:
df_precip = preprocess_test('Daily Precipitation.csv')
df_soil = preprocess_test('Daily Soil Mositure.csv')
df_temp = preprocess_test('Daily Temperature.csv')
df_ndvi = preprocess_test('Eight Day NDVI.csv')

In [None]:
#No null values are present and equal number of values are present across all dataframes

In [None]:
df_prediction = pd.read_csv('predicted_production_qty.csv', parse_dates = ['start_date','end_date'])
df_prediction = df_prediction.set_index(pd.to_datetime(df_prediction['start_date'].dt.date))
df_prediction = df_prediction.drop(['start_date','end_date'], axis = 1)
df_prediction.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 120 entries, 2021-01-01 to 2021-12-01
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   prod       0 non-null      float64
 1   region_id  120 non-null    int64  
dtypes: float64(1), int64(1)
memory usage: 2.8 KB


In [None]:
df_prediction = df_prediction.drop(columns=['prod'],axis=1)

In [None]:
#Calculating average values over 6 months similar to the training phase
precip = []
for val in df_precip.region_id.unique():
  arr = []
  df = df_precip.loc[df_precip['region_id'] == val]
  for i in range(df.shape[0]-6):
    arr.append(df.iloc[i:i+6]['precip'].mean())
  precip+=arr

temperature = []
for val in df_temp.region_id.unique():
  arr = []
  df = df_temp.loc[df_temp['region_id'] == val]
  for i in range(df.shape[0]-6):
    arr.append(df.iloc[i:i+6]['temp'].mean())
  temperature+=arr

smos = []
for val in df_soil.region_id.unique():
  arr = []
  df = df_soil.loc[df_soil['region_id'] == val]
  for i in range(df.shape[0]-6):
    arr.append(df.iloc[i:i+6]['smos'].mean())
  smos+=arr

ndvi = []
for val in df_ndvi.region_id.unique():
  arr = []
  df = df_ndvi.loc[df_ndvi['region_id'] == val]
  for i in range(df.shape[0]-6):
    arr.append(df.iloc[i:i+6]['ndvi'].mean())
  ndvi+=arr

In [None]:
df_prediction['precip'] = precip
df_prediction['temp'] = temperature
df_prediction['smos'] = smos
df_prediction['ndvi'] = ndvi

In [None]:
df_min_max_scaled = df_prediction.copy()
for column in ['precip','temp','smos','ndvi']:
  df_min_max_scaled[column] = (df_min_max_scaled[column] - df_min_max_scaled[column].min()) / (df_min_max_scaled[column].max() - df_min_max_scaled[column].min())

In [None]:
df_min_max_scaled.head()

Unnamed: 0_level_0,region_id,precip,temp,smos,ndvi
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-01-01,93,0.246991,0.84079,0.934698,0.668766
2021-02-01,93,0.198411,0.679072,0.957111,0.628453
2021-03-01,93,0.415509,0.591427,1.0,0.558868
2021-04-01,93,0.321233,0.560153,0.99615,0.460129
2021-05-01,93,0.314734,0.587303,0.966738,0.394831


In [None]:
pred_df = df_min_max_scaled.copy()

In [None]:
pred_df = pred_df.sample(frac=1)
pred_df.head()

Unnamed: 0_level_0,region_id,precip,temp,smos,ndvi
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-03-01,98,0.470483,0.334428,0.288393,0.722693
2021-01-01,103,0.891464,0.244905,0.222984,0.904067
2021-01-01,98,0.279613,0.538138,0.129708,0.801969
2021-09-01,97,0.038377,0.73517,0.603589,0.615615
2021-06-01,93,0.422158,0.5614,0.997427,0.406201


In [None]:
pred_df['region_id'] = pred_df.region_id.astype('category')

In [None]:
pred_df.head()

Unnamed: 0_level_0,region_id,precip,temp,smos,ndvi
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-03-01,98,0.470483,0.334428,0.288393,0.722693
2021-01-01,103,0.891464,0.244905,0.222984,0.904067
2021-01-01,98,0.279613,0.538138,0.129708,0.801969
2021-09-01,97,0.038377,0.73517,0.603589,0.615615
2021-06-01,93,0.422158,0.5614,0.997427,0.406201


In [None]:
regions = pd.get_dummies(pred_df,drop_first=True)
pred_df = pred_df.drop('region_id',axis=1)
pred_df = pd.concat([pred_df,regions],axis=1)
pred_df

Unnamed: 0_level_0,precip,temp,smos,ndvi,precip,temp,smos,ndvi,region_id_94,region_id_95,region_id_97,region_id_98,region_id_99,region_id_102,region_id_103,region_id_104,region_id_105
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2021-03-01,0.470483,0.334428,0.288393,0.722693,0.470483,0.334428,0.288393,0.722693,0,0,0,1,0,0,0,0,0
2021-01-01,0.891464,0.244905,0.222984,0.904067,0.891464,0.244905,0.222984,0.904067,0,0,0,0,0,0,1,0,0
2021-01-01,0.279613,0.538138,0.129708,0.801969,0.279613,0.538138,0.129708,0.801969,0,0,0,1,0,0,0,0,0
2021-09-01,0.038377,0.735170,0.603589,0.615615,0.038377,0.735170,0.603589,0.615615,0,0,1,0,0,0,0,0,0
2021-06-01,0.422158,0.561400,0.997427,0.406201,0.422158,0.561400,0.997427,0.406201,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-11-01,0.341216,0.751485,0.714316,0.197838,0.341216,0.751485,0.714316,0.197838,0,0,0,0,0,0,0,1,0
2021-11-01,0.295806,0.410385,0.066713,0.811947,0.295806,0.410385,0.066713,0.811947,0,0,0,1,0,0,0,0,0
2021-04-01,0.754515,0.098061,0.450320,0.809709,0.754515,0.098061,0.450320,0.809709,0,0,0,0,0,0,1,0,0
2021-06-01,0.344461,0.260795,0.586853,0.254555,0.344461,0.260795,0.586853,0.254555,0,1,0,0,0,0,0,0,0


In [None]:
final_answers = LR.predict(pred_df)

In [None]:
final_df = pd.read_csv('predicted_production_qty.csv')
final_df['prod'] = final_answers

In [None]:
final_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   start_date  120 non-null    object 
 1   end_date    120 non-null    object 
 2   prod        120 non-null    float64
 3   region_id   120 non-null    int64  
dtypes: float64(1), int64(1), object(2)
memory usage: 3.9+ KB


In [None]:
final_df.to_csv('predicted_production_qty.csv')