In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# **Loading the data**

In [None]:
#Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
url = '/kaggle/input/irrigation-scheduling/Irrigation Scheduling.csv'

In [None]:
#Load the data
df=pd.read_csv(url)
df

In [None]:
#To display first five records
df.head(5)

In [None]:
#To display last five records
df.tail(5)

In [None]:
#To display class
df['class']

# **Describing the data**

In [None]:
#describe() method returns description of the data in the DataFrame.
df.describe()

In [None]:
#To display number of rows and columns
df.shape

In [None]:
#To display number of columns, column labels, column data types, memory usage, range index, and the number of cells in each column (non-null values).
df.info()

In [None]:
type(df)

In [None]:
#To return data type of each column. 
df.dtypes

In [None]:
#To return the number of unique values for each column
df.nunique()

In [None]:
#To count the number of not empty values for each row
df.count()

# **Data Wrangling**

In [None]:
#Dropping unnecessary column 
df.drop("id",axis=1,inplace=True)

In [None]:
#Checking whether the column has been removed or not
df.head()

In [None]:
#To display the number of empty values (NaN,NAN,na) in each column
df.isna()

In [None]:
#Count the number of empty values (NaN,NAN,na) in each column
df.isna().sum()

In [None]:
#Filling the empty values(NaN,NAN,na) of column Altitude with average of all values of same column
df['altitude'].fillna(int(df['altitude'].mean()),inplace=True)

In [None]:
#Now checking the number of empty values (NaN,NAN,na) in Altitude column
df['altitude'].isna().sum()

In [None]:
#Checking the number of non empty values of each class
df['class'].value_counts()

In [None]:
#Another method
df.groupby('class').size()

# **Exploratory Data Analysis**

In [None]:
#Visualization of data using matplotlib.pyplot library
import matplotlib.pyplot as plt
plt.hist(df['class'])
plt.show()

In [None]:
#Again getting number of non empty values of each class
df.groupby('class').size()

In [None]:
#Putting the count in a list
List=[366,1023,1842,1457]

In [None]:
#Visualizing it using piechart
import matplotlib.pyplot as plt
plt.pie(List,labels=["Dry","Very Dry","Very wet","Wet"])
plt.show()

In [None]:
#Visualization of data using seaborn library
import seaborn as sns
#Visualizing class using countplot
#A count plot is helpful when dealing with categorical values. It is used to plot the frequency of the different categories.
sns.countplot(x='class',data=df)

# **Encoding categorical values**

In [None]:
#To display the values of column class
df.iloc[:,6].values

In [None]:
from sklearn.preprocessing import OneHotEncoder
onehot_encoder=OneHotEncoder()
df['class']=onehot_encoder.fit_transform(df[['class']]).toarray()
df['class'].values

In [None]:
#Now checking data type of column class
df['class'].dtype

In [None]:
#To display first 5 rows of new data
df.head(5)

# **Correlation of columns**

In [None]:
#Finding the correlation 
#corr() function tells us how one column can influence the other
data=df.iloc[:,0:7].corr()
data

In [None]:
#Visualize the correlation of above data using heatmap
sns.heatmap(data,annot=True,fmt='.0%')

# **Machine Learning**

In [None]:
#Dividing data into X and Y(converting into numpy)
X=df.iloc[:,0:4].values  #Independent dataset 
Y=df.iloc[:,6].values    #Dependent dataset 

In [None]:
print(X)

In [None]:
print(Y)

In [None]:
#Splitting dataset into 80% training and 20% testing
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.20,random_state=0)

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
X_train

In [None]:
X_test

In [None]:
Y_train

In [None]:
Y_test

**Model 1: Logistic Regression**

In [None]:
#Fitting the Logistic Regression model on X and Y
from sklearn.linear_model import LogisticRegression
model1=LogisticRegression()
model1.fit(X_train,Y_train)

In [None]:
# prediction of this model 
pred1=model1.predict(X_test)

In [None]:
#Testing model accuracy on test data
from sklearn.metrics import accuracy_score
accuracy_score(Y_test,pred1)

In [None]:
#Confusion Matrix
from sklearn.metrics import confusion_matrix
print(confusion_matrix(Y_test,pred1))

In [None]:
#Classification report
from sklearn.metrics import classification_report
print(classification_report(Y_test,pred1))

**Model 2: Gaussian Naive Bayes Classifier**

In [None]:
#Fitting the Gaussian Naive Bayes Classifier model on X and Y
from sklearn.naive_bayes import GaussianNB
model2=GaussianNB()
model2.fit(X_train,Y_train) 

In [None]:
#prediction of this model
pred2=model2.predict(X_test)

In [None]:
#Testing model accuracy on test data
from sklearn.metrics import accuracy_score
accuracy_score(Y_test,pred2)

In [None]:
#Confusion Matrix
from sklearn.metrics import confusion_matrix
print(confusion_matrix(Y_test,pred2))

In [None]:
#Classification report
from sklearn.metrics import classification_report
print(classification_report(Y_test,pred2))

**Model 3: Support Vector Classifier**

In [None]:
from sklearn.svm import SVC
model3=SVC(kernel='linear')
model3.fit(X_train,Y_train) 

In [None]:
# prediction of this model 
pred3=model3.predict(X_test)

In [None]:
#Testing model accuracy on test data
from sklearn.metrics import accuracy_score
print(accuracy_score(Y_test,pred3))

In [None]:
#Confusion Matrix
from sklearn.metrics import confusion_matrix
print(confusion_matrix(Y_test,pred3))

In [None]:
#Classification report
from sklearn.metrics import classification_report
print(classification_report(Y_test,pred3))