In [6]:
#Importing required libraries for the assignment code

import pandas as pd #for data-preprocessing
import numpy as np #for creating nd-arrays and matrix multiplications
from sklearn.model_selection import train_test_split #for splitting dataset into test and train data
from sklearn.linear_model import LogisticRegression #for using Logistic Regression classification and compare it's accuracy with bulid classifier
from sklearn import metrics #for accuracy calculations of 
from sklearn import preprocessing #for importing LabelEncoder

wildfires_data = open('wildfires.txt', 'r')


#Creating a function which removes any uneeded spaces in the text dataset
def remove_spaces(input_file):
    elements = []
    for line in input_file:
        element = []
        element = line.split()
        elements.append(element)
    return elements

#Creating the required column names of the dataset
column_names = ['fire', 'year', 'temp', 'humidity', 'rainfall', 'drought_code', 'buildup_index', 'day', 'month', 'wind_speed']

#Applying the function to remove spaces and creating the dataframe using pandas and giving the appropiate column names
wildfires_df = pd.DataFrame(remove_spaces(wildfires_data), columns = column_names)

#Dropping the first row of the dataframe as it contains column names 
wildfires_df = wildfires_df.iloc[1:]

#Creating feature columns
features_columns = ['year', 'temp', 'humidity', 'rainfall', 'drought_code', 'buildup_index', 'day', 'month', 'wind_speed']

#creating labelEncoder 
label_encoder = preprocessing.LabelEncoder()

#Label encoding the fire column
wildfires_df['fire'] = label_encoder.fit_transform(wildfires_df['fire'])
#print(wildfires_df)

#Creating X and y dataframes with required features and classes from wildfires dataset 
X = wildfires_df[features_columns]
y = wildfires_df['fire'] 


In [2]:
#Creating Logistic Regression Classifier
class LogisticRegressionClassifier:
    
    #Creating init method and initializing the values 0.001 and 1000 to learning rate and number of iterations for gradient descent respectively
    def __init__(self, learning_rate=0.001, number_of_iterations=1000):
        self.learning_rate = learning_rate
        self.number_of_iterations = number_of_iterations
        #Creating weights and bias as None initially, which laters need to be calculated
        self.weights = None
        self.bias = None
        
    #Creating the sigmoid function
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    #Creating method to calculate gradient descent and updating weights and bias
    def gradient_descent(self, n_sample, p_score, actual, Z):
        
        #Calculating the derivative of wegights and bias
        dw = (1 / n_sample) * np.dot(Z.T, (p_score - actual))
        db = (1 / n_sample) * np.sum(p_score - actual)
        
        #Adjusting the weights and bias with respect to learing rate 
        self.weights = self.weights - self.learning_rate * dw
        self.bias = self.bias - self.learning_rate * db
    
    #Creating the fit method which will take training dataset and labels as parameters
    def fit(self, train_data, target_labels):
        
        #Initializing the parameters
        number_of_samples = train_data.shape[0]
        number_of_features = train_data.shape[1]
        self.weights = np.zeros(number_of_features)
        self.bias = 0

        #Running iterations for building linear model to fit into sigmoid functtion and updating weights and bias using gradient descent function
        for _ in range(self.number_of_iterations):
            
            #Creating a linear model with y and dot product of weights and training features data adding bias
            linear_model = np.dot(train_data, self.weights) + self.bias
            
            #Applying the build sigmoid function to the linear model to find the probilistic scores(approximations) of y
            probability_scores = self.sigmoid(linear_model)

            #Calling Gradient Descent function 
            self.gradient_descent(number_of_samples, probability_scores, target_labels, train_data)

    def predict_class(self, X):
        
        #Creating a linear model with y and dot product of weights and x adding bias
        linear_model = np.dot(X, self.weights) + self.bias
        
        #Applying the build sigmoid function to the linear model to find the approximation of y
        probability_scores = self.sigmoid(linear_model) 
        
        #Using list comprehension to predict classes(values > 0.5 as 1 and values < 0.5 as 0)
        predicted_classes = [1 if i > 0.5 else 0 for i in probability_scores]
        return np.array(predicted_classes)



#Defining the accuracy function to calculate the accuracy of the build model
def accuracy_score(actual_class, predicted_class):
        accuracy_score = np.sum(actual_class== predicted_class) / len(actual_class)
        return accuracy_score


#Building list to store accuracy for 10 iterations and calulating mean accuracy score
build_model_accuracy_list = []
scikitlearn_model_accuracy_list = []

for i in range(10):
    
    print("Iteration Number:",i+1)
    
    #Splitting the dataset into train and test datasets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle = True)
    
    #Converting the train and test datasets into numpy nd-array
    X_train, X_test, y_train, y_test = X_train.values , X_test.values , y_train.values , y_test.values
    
    #Converting the nd-array of test and train dataset into float values in order to facilitate matrix multiplications
    X_train, X_test, y_train, y_test = X_train.astype(float) , X_test.astype(float) , y_train.astype(float) , y_test.astype(float)    
    
    #Creating a classifier object using the implemented(built) Logistic Regression classifier and setting parameters 0.0001 and 10000 for learning rate and number of iterations
    logistic_regression_build_model = LogisticRegressionClassifier(learning_rate=0.0001, number_of_iterations=10000)
    
    #Calling the fit function to train the model with the train datasets
    logistic_regression_build_model.fit(X_train, y_train)
    
    #Predicting the class on test dataset using the model
    build_predictions = logistic_regression_build_model.predict_class(X_test)
    
    #Calculating the accuracy score with accuracy function
    build_model_accuracy = (accuracy_score(y_test, build_predictions))*100
    
    #Printing the accuracy in each iteration
    print("Implemented Logistic Regression Classifier model accuracy:", build_model_accuracy)
    
    #Appending the accuracy of each iteration in the respective list
    build_model_accuracy_list.append(build_model_accuracy)
    
    
    #Creating Logistic Regression classifier object using Sk-learn Logistic Regression classifier
    scikitlearn_logistic_regression_model = LogisticRegression(solver='lbfgs', max_iter=1000)
    
    #Training Logistic Regression Classifier
    logisticregression_clf = scikitlearn_logistic_regression_model.fit(X_train, y_train)
    
    #Predicting the response from test dataset
    scikitlearn_model_predictions = logisticregression_clf.predict(X_test)
    
    scikitlearn_model_accuracy =  (metrics.accuracy_score(y_test, scikitlearn_model_predictions))*100
    print("Scikit-Learn Logistic Regression Classifier model accuracy:", scikitlearn_model_accuracy,"\n")
    scikitlearn_model_accuracy_list.append(scikitlearn_model_accuracy)


print("Mean Accuracy of Implemented Logistic Regression Classifier model accuracy: ", np.mean(build_model_accuracy_list))

print("Mean Accuracy of Scikit-Learn Logistic Regression Classifier model accuracy: ", np.mean(scikitlearn_model_accuracy_list))

#Creating dictionary of actual and predicted labels of model and storing it in an output file
actual_labels_and_predicted_labels = pd.DataFrame({'Actual Label': y_test, 'Predicted Label': build_predictions})
actual_labels_and_predicted_labels.to_csv("label_outputs.csv", index = False)

Iteration Number: 1
Implemented Logistic Regression Classifier model accuracy: 58.82352941176471
Scikit-Learn Logistic Regression Classifier model accuracy: 80.88235294117648 

Iteration Number: 2
Implemented Logistic Regression Classifier model accuracy: 85.29411764705883
Scikit-Learn Logistic Regression Classifier model accuracy: 86.76470588235294 

Iteration Number: 3
Implemented Logistic Regression Classifier model accuracy: 75.0
Scikit-Learn Logistic Regression Classifier model accuracy: 89.70588235294117 

Iteration Number: 4
Implemented Logistic Regression Classifier model accuracy: 67.64705882352942
Scikit-Learn Logistic Regression Classifier model accuracy: 88.23529411764706 

Iteration Number: 5
Implemented Logistic Regression Classifier model accuracy: 69.11764705882352
Scikit-Learn Logistic Regression Classifier model accuracy: 88.23529411764706 

Iteration Number: 6
Implemented Logistic Regression Classifier model accuracy: 70.58823529411765
Scikit-Learn Logistic Regressio