In [1]:
import numpy as np
import pandas as pd
pd.pandas.set_option('display.max_columns',None)

import seaborn as sns
sns.set(font_scale=1.2)

import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (12,8)
%matplotlib inline

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)


In [2]:
df = pd.read_csv(r'data/Historical Weather Data 2010-2021_preprocessed_1.csv')
display(df.head(2))

Unnamed: 0,observation,date,month,year,tempC_7to8,tempC_1to2,tempC_6to7,tempC_avg(0C),Relative humidity_7to8,Relative humidity_1to2,Relative humidity_6to7,Relative humidity_avg(%),windspeedKmph_7to8,windspeedKmph_1to2,windspeedKmph_6to7,windspeedKmph_avg(Km/h),pressureMB_7to8,pressureMB_1to2,pressureMB_6to7,pressureMB_avg,precipMM_7to8,precipMM_1to2,precipMM_6to7,precipMM_avg(mm),weatherDesc_7to8,weatherDesc_1to2,weatherDesc_6to7,weatherDesc,Sunshine Hours,%_soil_moisure,soil_pH,water_pH,water_TDS_mgpl,Label (Disease Yes/No),Type of Disease (Bacterial Blight/Telya),Anthracnose,Fruit Spot/ Rot,Fusarium Wilt,Fruit Borer / Blight Blora
0,2010-01-01,1,1,2010,20,30,20,23,42,33,59,44,9,2,4,5,1015,1012,1013,1013,0.0,0.0,0.0,0.0,5,5,5,5,9.8,45,6.91,7.18,1709.0,0,0,0,0,0,0
1,2010-01-02,2,1,2010,23,29,23,25,49,40,62,50,9,3,3,5,1015,1013,1015,1014,0.0,0.0,0.0,0.0,5,4,4,4,9.8,45,6.77,7.66,1707.0,1,1,1,0,0,0


In [3]:
df.columns

Index(['observation', 'date', 'month', 'year', 'tempC_7to8', 'tempC_1to2',
       'tempC_6to7', 'tempC_avg(0C)', 'Relative humidity_7to8',
       'Relative humidity_1to2', 'Relative humidity_6to7',
       'Relative humidity_avg(%)', 'windspeedKmph_7to8', 'windspeedKmph_1to2',
       'windspeedKmph_6to7', 'windspeedKmph_avg(Km/h)', 'pressureMB_7to8',
       'pressureMB_1to2', 'pressureMB_6to7', 'pressureMB_avg', 'precipMM_7to8',
       'precipMM_1to2', 'precipMM_6to7', 'precipMM_avg(mm)',
       'weatherDesc_7to8', 'weatherDesc_1to2', 'weatherDesc_6to7',
       'weatherDesc', 'Sunshine Hours', '%_soil_moisure', 'soil_pH',
       'water_pH', 'water_TDS_mgpl', 'Label (Disease Yes/No)',
       'Type of Disease (Bacterial Blight/Telya)', 'Anthracnose',
       'Fruit Spot/ Rot', 'Fusarium Wilt', 'Fruit Borer / Blight Blora'],
      dtype='object')

In [4]:
df_with_correlation = df[['tempC_avg(0C)', 'Relative humidity_avg(%)', 'windspeedKmph_avg(Km/h)', 'pressureMB_avg', 
'precipMM_avg(mm)', 'weatherDesc', 'Sunshine Hours', '%_soil_moisure', 'Type of Disease (Bacterial Blight/Telya)', 
'Anthracnose','Fruit Spot/ Rot', 'Fusarium Wilt', 'Fruit Borer / Blight Blora']]
display(df_with_correlation.head(2))

Unnamed: 0,tempC_avg(0C),Relative humidity_avg(%),windspeedKmph_avg(Km/h),pressureMB_avg,precipMM_avg(mm),weatherDesc,Sunshine Hours,%_soil_moisure,Type of Disease (Bacterial Blight/Telya),Anthracnose,Fruit Spot/ Rot,Fusarium Wilt,Fruit Borer / Blight Blora
0,23,44,5,1013,0.0,5,9.8,45,0,0,0,0,0
1,25,50,5,1014,0.0,4,9.8,45,1,1,0,0,0


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import confusion_matrix
from sklearn import metrics
import pickle

- [link 1](https://www.section.io/engineering-education/multi-label-classification-with-scikit-multilearn/)
- [link 2](https://www.analyticsvidhya.com/blog/2017/08/introduction-to-multi-label-classification/)
- [link 3](http://scikit.ml/)
- [link 4](http://scikit.ml/modelselection.html)
- [link 5](http://scikit.ml/api/skmultilearn.problem_transform.lp.html)
- [link 6](https://www.kaggle.com/code/roccoli/multi-label-classification-with-sklearn/notebook)
- [link 7](https://towardsdatascience.com/multi-label-text-classification-with-scikit-learn-30714b7819c5)

In [7]:
# df_with_correlation.iloc[:,:-5]
df_with_correlation.iloc[:,-5:]

Unnamed: 0,Type of Disease (Bacterial Blight/Telya),Anthracnose,Fruit Spot/ Rot,Fusarium Wilt,Fruit Borer / Blight Blora
0,0,0,0,0,0
1,1,1,0,0,0
2,0,0,0,0,0
3,0,0,0,0,0
4,0,0,0,0,0
...,...,...,...,...,...
4222,1,0,0,0,0
4223,1,0,0,0,0
4224,0,0,0,0,0
4225,0,0,0,0,0


In [9]:
X, Y = df_with_correlation.iloc[:,:-5], df_with_correlation.iloc[:,-5:]
X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size = 0.3, random_state = 42) # , stratify = Y

https://www.google.com/search?q=multi+label+classifier+sklearn&rlz=1C1VDKB_enIN994IN994&sxsrf=ALiCzsaDEFrZDeoqmCF6J7PKN53huyLMBA%3A1653634506285&ei=ynWQYsiBEfKG1e8P6vyN-As&oq=multi+label+classifier+sklearn+&gs_lcp=Cgdnd3Mtd2l6EAMYADIGCAAQHhAWMgYIABAeEBYyBggAEB4QFjIGCAAQHhAWMgYIABAeEBYyBggAEB4QFjIGCAAQHhAWOgQIABBDOgYIABAeEAc6CggAELEDEIMBEAo6BAgAEAo6BggAEB4QCjoECAAQHjoICAAQHhAPEApKBAhBGABKBAhGGABQAFigOWCDf2gAcAF4AIABvgGIAdsDkgEDMC4zmAEAoAEBwAEB&sclient=gws-wiz
    

https://scikit-learn.org/stable/modules/multiclass.html

https://scikit-learn.org/stable/modules/generated/sklearn.multioutput.MultiOutputClassifier.html#sklearn.multioutput.MultiOutputClassifier

https://scikit-learn.org/stable/modules/generated/sklearn.multioutput.MultiOutputClassifier.html#sklearn.multioutput.MultiOutputClassifier

https://scikit-learn.org/stable/modules/multiclass.html#multiclass-multioutput-classification

https://scikit-learn.org/0.15/modules/multiclass.html

https://scikit-learn.org/stable/auto_examples/miscellaneous/plot_multilabel.html

https://towardsdatascience.com/multi-label-text-classification-with-scikit-learn-30714b7819c5

https://github.com/susanli2016/Machine-Learning-with-Python/blob/master/Multi%20label%20text%20classification.ipynb

https://www.google.com/search?q=OneVsRestClassifier&rlz=1C1VDKB_enIN994IN994&oq=OneVsRestClassifier&aqs=chrome..69i57j0i512l9.2858j0j7&sourceid=chrome&ie=UTF-8

https://scikit-learn.org/stable/modules/generated/sklearn.multiclass.OneVsRestClassifier.html

https://stackoverflow.com/questions/42819460/what-is-the-difference-between-onevsrestclassifier-and-multioutputclassifier-in

https://machinelearningmastery.com/one-vs-rest-and-one-vs-one-for-multi-class-classification/

https://www.programcreek.com/python/example/94869/sklearn.multiclass.OneVsRestClassifier

https://www.geeksforgeeks.org/one-vs-rest-strategy-for-multi-class-classification/

https://docs.w3cub.com/scikit_learn/modules/generated/sklearn.multiclass.onevsrestclassifier

https://www.kaggle.com/code/venkatkrishnan/nlp-multiclass-onevsrestclassifier/notebook

https://github.com/christianversloot/machine-learning-articles/blob/main/creating-one-vs-rest-and-one-vs-one-svm-classifiers-with-scikit-learn.md

https://www.analyticsvidhya.com/blog/2021/09/onevsrest-classifier-for-predicting-multiple-tags-of-research-articles/

https://docs.microsoft.com/en-us/python/api/nimbusml/nimbusml.multiclass.onevsrestclassifier?view=nimbusml-py-latest

https://programtalk.com/python-examples/sklearn.multiclass.OneVsRestClassifier/

