In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv("/content/glacier_logistic_regression_7col.csv")
df.head()

Unnamed: 0,Year,Temp,Precip,Radiation,Altitude,TempPrecipRatio,Status
0,1995,-2.1,785.7,1207.1,3468.3,-0.003,Slow
1,1996,-3.2,1017.9,1187.9,3330.4,-0.003,Slow
2,1997,-2.9,892.6,1144.2,3378.7,-0.003,Slow
3,1998,-2.5,742.8,1082.3,3356.3,-0.003,Slow
4,1999,-2.6,909.1,1028.8,3407.4,-0.003,Slow


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Year             30 non-null     int64  
 1   Temp             30 non-null     float64
 2   Precip           30 non-null     float64
 3   Radiation        30 non-null     float64
 4   Altitude         30 non-null     float64
 5   TempPrecipRatio  30 non-null     float64
 6   Status           30 non-null     object 
dtypes: float64(5), int64(1), object(1)
memory usage: 1.8+ KB


In [4]:
df.describe()

Unnamed: 0,Year,Temp,Precip,Radiation,Altitude,TempPrecipRatio
count,30.0,30.0,30.0,30.0,30.0,30.0
mean,2009.5,-0.856667,870.633333,1166.023333,3393.126667,-0.000967
std,8.803408,1.394245,89.296794,83.449831,93.286026,0.001629
min,1995.0,-3.2,742.8,1028.8,3217.7,-0.004
25%,2002.25,-2.075,783.075,1116.925,3334.625,-0.002
50%,2009.5,-0.8,877.8,1179.7,3396.9,-0.001
75%,2016.75,0.225,923.2,1200.5,3460.175,-0.0
max,2024.0,1.5,1094.8,1458.2,3608.3,0.002


In [5]:
df.isnull().sum()

Unnamed: 0,0
Year,0
Temp,0
Precip,0
Radiation,0
Altitude,0
TempPrecipRatio,0
Status,0


In [6]:
df[df["Status"]=="Fast"]

Unnamed: 0,Year,Temp,Precip,Radiation,Altitude,TempPrecipRatio,Status
16,2011,-0.1,776.6,1194.9,3419.5,-0.0,Fast
20,2015,0.3,924.4,1075.0,3457.1,0.0,Fast
21,2016,-0.3,851.2,1191.2,3217.7,-0.0,Fast
23,2018,0.7,826.2,1191.2,3340.4,0.001,Fast
24,2019,0.7,1094.8,1458.2,3476.7,0.001,Fast
25,2020,0.9,906.3,1195.7,3328.7,0.001,Fast
26,2021,1.0,772.9,1240.8,3389.7,0.001,Fast
27,2022,0.8,753.6,1226.3,3445.4,0.001,Fast
28,2023,1.3,893.4,1202.1,3477.9,0.001,Fast
29,2024,1.5,829.9,1124.8,3292.0,0.002,Fast


In [7]:
df[df["Temp"]<0]

Unnamed: 0,Year,Temp,Precip,Radiation,Altitude,TempPrecipRatio,Status
0,1995,-2.1,785.7,1207.1,3468.3,-0.003,Slow
1,1996,-3.2,1017.9,1187.9,3330.4,-0.003,Slow
2,1997,-2.9,892.6,1144.2,3378.7,-0.003,Slow
3,1998,-2.5,742.8,1082.3,3356.3,-0.003,Slow
4,1999,-2.6,909.1,1028.8,3407.4,-0.003,Slow
5,2000,-2.9,762.3,1114.3,3608.3,-0.004,Slow
6,2001,-2.1,920.8,1218.5,3231.9,-0.002,Slow
7,2002,-2.4,954.3,1167.1,3461.8,-0.003,Slow
8,2003,-1.6,776.1,1050.3,3254.9,-0.002,Normal
9,2004,-2.0,936.7,1163.9,3357.5,-0.002,Slow


In [8]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
df['Status'] = label_encoder.fit_transform(df['Status'])

In [9]:
label_encoder.classes_

array(['Fast', 'Normal', 'Slow'], dtype=object)

In [10]:
df.head()

Unnamed: 0,Year,Temp,Precip,Radiation,Altitude,TempPrecipRatio,Status
0,1995,-2.1,785.7,1207.1,3468.3,-0.003,2
1,1996,-3.2,1017.9,1187.9,3330.4,-0.003,2
2,1997,-2.9,892.6,1144.2,3378.7,-0.003,2
3,1998,-2.5,742.8,1082.3,3356.3,-0.003,2
4,1999,-2.6,909.1,1028.8,3407.4,-0.003,2


In [11]:
from sklearn.model_selection import train_test_split
x = df.drop(columns=["Status"])  # Features
y = df["Status"]

In [13]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=42)

In [14]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
xtrain = scaler.fit_transform(xtrain)
xtest = scaler.transform(xtest)

In [15]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(xtrain, ytrain)

In [16]:
# Step 7: Make predictions
ypred = model.predict(xtest)


In [17]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
accuracy = accuracy_score(ytest, ypred)
conf_matrix = confusion_matrix(ytest, ypred)
class_report = classification_report(ytest, ypred)

# Print results
print("Model Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)

Model Accuracy: 0.6666666666666666
Confusion Matrix:
 [[2 0 0]
 [0 1 1]
 [0 1 1]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       0.50      0.50      0.50         2
           2       0.50      0.50      0.50         2

    accuracy                           0.67         6
   macro avg       0.67      0.67      0.67         6
weighted avg       0.67      0.67      0.67         6



In [18]:
from sklearn.neighbors import KNeighborsClassifier
knn=KNeighborsClassifier()
knn.fit(xtrain,ytrain)

ypred=knn.predict(xtest)

from sklearn.metrics import classification_report
print(classification_report(ytest, ypred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       0.67      1.00      0.80         2
           2       1.00      0.50      0.67         2

    accuracy                           0.83         6
   macro avg       0.89      0.83      0.82         6
weighted avg       0.89      0.83      0.82         6



In [19]:
from sklearn.tree import DecisionTreeClassifier
dt=DecisionTreeClassifier()
dt.fit(xtrain,ytrain)

ypred=dt.predict(xtest)

from sklearn.metrics import classification_report
print(classification_report(ytest, ypred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       0.33      0.50      0.40         2
           2       0.00      0.00      0.00         2

    accuracy                           0.50         6
   macro avg       0.44      0.50      0.47         6
weighted avg       0.44      0.50      0.47         6

