In [6]:
import numpy as np
import pandas as pd
import mlflow
import logging
import warnings
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
import io

In [7]:
logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)
logger



In [8]:
if __name__ == "__main__":
    
    warnings.filterwarnings("ignore")
    np.random.seed(40)
    mlflow.autolog()
    # Set our tracking server uri for logging
    mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")
    
    # Create a new MLflow Experiment
    mlflow.set_experiment("My First MLOps Project")

    try:
        file =("./data/crop_production.csv")
        df = pd.read_csv(file, delimiter=',')
        print(df)
        print(df.describe())  #max, min, count, mean, standard deviation etc.

        
    except Exception as e:
        logger.exception(
            "Unable to download training & test CSV, check your internet connection. Error: %s", e)
        
    print(df.shape)
    print(df.isnull().sum())
    df.fillna(0,inplace=True)
    df.drop_duplicates()
    sum_maxp = df["Production"].sum()
    print(sum_maxp)
    df["percent_of_production"] = df["Production"].map(lambda x:(x/sum_maxp)*100)
    data = df.drop(["State_Name","District_Name", "Crop_Year"],axis=1)
    data_dum = pd.get_dummies(data, dtype='int')
    
    X = data_dum.drop("Production",axis=1).values
    y = data_dum["Production"].values

    X_train, X_test, y_train, y_test = train_test_split(X , y, 
                                                        shuffle = True, 
                                                        test_size=0.25, 
                                                        random_state=1)
    
    print('Shape of training feature:', X_train.shape)
    print('Shape of testing feature:', X_test.shape)
    print('Shape of training label:', y_train.shape)
    print('Shape of training label:', y_test.shape)
    with mlflow.start_run():
        model = LinearRegression()
        model.fit(X_train,y_train)
        preds = model.predict(X_test)
        print(f"The r2 score for this model is : {r2_score(y_test,preds)}")
        print(model.coef_)
        print(model.intercept_)
        

2024/06/14 16:56:18 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


                         State_Name District_Name  Crop_Year       Season  \
0       Andaman and Nicobar Islands      NICOBARS       2000  Kharif        
1       Andaman and Nicobar Islands      NICOBARS       2000  Kharif        
2       Andaman and Nicobar Islands      NICOBARS       2000  Kharif        
3       Andaman and Nicobar Islands      NICOBARS       2000  Whole Year    
4       Andaman and Nicobar Islands      NICOBARS       2000  Whole Year    
...                             ...           ...        ...          ...   
246086                  West Bengal       PURULIA       2014  Summer        
246087                  West Bengal       PURULIA       2014  Summer        
246088                  West Bengal       PURULIA       2014  Whole Year    
246089                  West Bengal       PURULIA       2014  Winter        
246090                  West Bengal       PURULIA       2014  Winter        

                       Crop      Area  Production  
0                  Arec

In [9]:
seasons= list(set(df["Season"]))
crops= list(set(df["Crop"]))
seasons.sort()
print(seasons)
crops.sort()
print(crops)
cols= list(data_dum.columns)
cols=cols[4:]



['Autumn     ', 'Kharif     ', 'Rabi       ', 'Summer     ', 'Whole Year ', 'Winter     ']
['Apple', 'Arcanut (Processed)', 'Arecanut', 'Arhar/Tur', 'Ash Gourd', 'Atcanut (Raw)', 'Bajra', 'Banana', 'Barley', 'Bean', 'Beans & Mutter(Vegetable)', 'Beet Root', 'Ber', 'Bhindi', 'Bitter Gourd', 'Black pepper', 'Blackgram', 'Bottle Gourd', 'Brinjal', 'Cabbage', 'Cardamom', 'Carrot', 'Cashewnut', 'Cashewnut Processed', 'Cashewnut Raw', 'Castor seed', 'Cauliflower', 'Citrus Fruit', 'Coconut ', 'Coffee', 'Colocosia', 'Cond-spcs other', 'Coriander', 'Cotton(lint)', 'Cowpea(Lobia)', 'Cucumber', 'Drum Stick', 'Dry chillies', 'Dry ginger', 'Garlic', 'Ginger', 'Gram', 'Grapes', 'Groundnut', 'Guar seed', 'Horse-gram', 'Jack Fruit', 'Jobster', 'Jowar', 'Jute', 'Jute & mesta', 'Kapas', 'Khesari', 'Korra', 'Lab-Lab', 'Lemon', 'Lentil', 'Linseed', 'Litchi', 'Maize', 'Mango', 'Masoor', 'Mesta', 'Moong(Green Gram)', 'Moth', 'Niger seed', 'Oilseeds total', 'Onion', 'Orange', 'Other  Rabi pulses', 'Other Cer

In [19]:
import gradio as gr

def greet(area, percent, season, crop):
    arr= np.zeros(132, dtype=np.float64)
    arr[0]= area
    arr[1]= percent
    
    i=2
    j=0
    while i<8 and j<6 :
        arr[i]= float(season== seasons[j])
        i+=1
        j+=1
    j=0
    while i<len(cols) and j<len(cols):
        arr[i]=float(crop==crops[j])
        i+=1
        j+=1

    arr= arr.reshape(-1, len(arr))
    preds = model.predict(arr)
    return preds[0]
    
demo = gr.Interface(
    greet,
    inputs=[  
        gr.Number(),
        gr.Number(),
        gr.Radio(seasons),
        gr.Dropdown(crops),
     
    ],
    outputs=[gr.Textbox(label="Model Prediction")],
    title="Crop Production Prediction",
    description="This application has a machine learning model in the backend which predicts the production this year using the area, percent of production, season and the crop."
)
demo.launch()



Running on local URL:  http://127.0.0.1:7868

To create a public link, set `share=True` in `launch()`.


