In [9]:
import numpy as np
import pandas as pd
import mlflow
import logging
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
import requests
import threading
from flask import Flask, request
import gradio as gr

In [10]:
logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)
logger



In [11]:
import dagshub
dagshub.init(repo_owner='kriti.srivastava002', repo_name='Crop-Production', mlflow=True)
mlflow.autolog()

2024/06/21 21:58:31 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


In [14]:
file =("./data/crop_production.csv")
df = pd.read_csv(file, delimiter=',')
print(df)
print(df.describe())

FileNotFoundError: [Errno 2] No such file or directory: './data/crop_production.csv'

In [None]:
print(df.shape)
print(df.isnull().sum())
df.fillna(0,inplace=True)
df.drop_duplicates()

In [None]:
#Total Production in a state
temp=df.groupby(by='State_Name')['Production'].sum().reset_index().sort_values(by='Production')
temp.plot(kind='bar', x='State_Name', y='Production')
plt.show()

In [None]:
#Production in a particular year
emp=df.groupby(by='Crop_Year')['Production'].sum().reset_index()
emp.plot(kind='bar', x="Crop_Year", y="Production")
plt.plot()

In [None]:
#Top 10 Districts by Production
temp=df.groupby(by='District_Name')['Production'].mean().reset_index().sort_values(by='Production')
temp.tail(10).plot(kind='bar', x='District_Name', y='Production')
plt.plot()

In [None]:
#Least 10 Crop Producing districts of India
temp=df.groupby(by='District_Name')['Production'].mean().reset_index().sort_values(by='Production')
temp[:10].plot(kind='bar', x='District_Name', y='Production')
plt.plot()

In [None]:
#Number of Crops grown from 1997-2015
temp= df['Crop'].value_counts()
temp.plot(kind='area')
plt.plot()

In [None]:
#top 10 crops by production
temp = df.groupby(by='Crop')['Production'].mean().reset_index().sort_values(by='Production')
temp.tail(10).plot(kind='bar', x='Crop', y='Production')
plt.plot()

In [None]:
#Production Mean vs Crop Year
temp= temp = df.groupby(by='Crop_Year')['Production'].mean().reset_index().sort_values(by='Production')
temp.plot(kind='bar', x="Crop_Year", y="Production")
plt.plot()

In [None]:
#Number of crops grown in which season
temp= df['Season'].value_counts()
temp.plot(kind='bar')
plt.plot()

In [None]:
#Crop year vs Area mean
temp = df.groupby(by='Crop_Year')['Area'].mean().reset_index()
temp.plot(kind='scatter', x='Crop_Year', y='Area')
plt.show()

In [None]:
sum_maxp = df["Production"].sum()
print(sum_maxp)
df["percent_of_production"] = df["Production"].map(lambda x:(x/sum_maxp)*100)


In [None]:
data = df.drop(["State_Name","District_Name", "Crop_Year"],axis=1)
data_dum = pd.get_dummies(data, dtype='int')
    
X = data_dum.drop("Production",axis=1).values
y = data_dum["Production"].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X , y, 
                                                        shuffle = True, 
                                                        test_size=0.25, 
                                                        random_state=1)
    
print('Shape of training feature:', X_train.shape)
print('Shape of testing feature:', X_test.shape)
print('Shape of training label:', y_train.shape)
print('Shape of training label:', y_test.shape)



with mlflow.start_run():
  # Your training code here...
  mlflow.log_param('Param name', 'Value')
  model = LinearRegression()
  model.fit(X_train,y_train)
  preds = model.predict(X_test)
  print(f"The r2 score for this model is : {r2_score(y_test,preds)}")
  mlflow.log_metric('r2 score' , r2_score(y_test,preds))
  print(model.coef_)
  #mlflow.log_metric('Model coefficient', model.coef_)
  print(model.intercept_)
  mlflow.log_param('Intercept', model.intercept_)

  


In [None]:
seasons= list(set(df["Season"]))
crops= list(set(df["Crop"]))
seasons.sort()
print(seasons)
crops.sort()
print(crops)
cols= list(data_dum.columns)
cols=cols[4:]



In [None]:
app=Flask(__name__)
app

In [None]:
@app.post('/predict')
def doPrediction():
    data= request.json
    arr= np.zeros(132, dtype=np.float64)
    arr[0]= data['area']
    arr[1]= data['percent']

    i=2
    j=0
    while i<8 and j<6 :
        arr[i]= float(data['season']== seasons[j])
        i+=1
        j+=1
    j=0
    while i<len(cols) and j<len(cols):
        arr[i]=float(data['crop']==crops[j])
        i+=1
        j+=1

    arr= arr.reshape(-1, len(arr))
    preds = model.predict(arr)
    return {'predicted_value' : preds[0]}, 200
    
def run_flask():
    app.run()

flask_thread= threading.Thread(target=run_flask)
flask_thread.start()

In [None]:
def greet(area, percent, season, crop):
    resp= requests.post("http://127.0.0.1:5000/predict", json={'area' : area, 'percent': percent, 'season' : season, 'crop' : crop})
    return resp.json()['predicted_value']
    
demo = gr.Interface(
    greet,
    inputs=[  
        gr.Number(minimum=0, maximum=9*1e7, step=0.01),
        gr.Number(minimum=0, maximum=1, step=1e-15),
        gr.Radio(seasons),
        gr.Dropdown(crops),
     
    ],
    outputs=[gr.Textbox(label="Model Prediction")],
    title="Crop Production Prediction",
    description="This application has a machine learning model in the backend which predicts the production this year using the area, percent of production, season and the crop."
)
demo.launch()

