In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, mean_absolute_error
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from google.colab import files

# Upload file
uploaded = files.upload()

# Load dataset
df = pd.read_csv("earthquake_data.csv")
df.head()

# Convert alert labels to numbers
df['alert'] = df['alert'].replace({
    'green': 0,
    'yellow': 1,
    'orange': 2,
    'red': 3
})

# Drop non-numeric columns
df = df.drop(['place','location','type','title','net','id','time','updated'], axis=1, errors='ignore')

# Remove missing values
df = df.dropna()
print(df.isnull().sum())
print(df.shape)

# Split features & labels
X = df.drop("alert", axis=1)
y = df["alert"]

# Label encode if needed
le = LabelEncoder()
y = le.fit_transform(y)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train models
log_model = LogisticRegression(max_iter=500)
dt_model = DecisionTreeClassifier()

log_model.fit(X_train, y_train)
dt_model.fit(X_train, y_train)

# Predictions
log_pred = log_model.predict(X_test)
dt_pred = dt_model.predict(X_test)

# Performance evaluation
print("========== MODEL PERFORMANCE ==========")
print("Logistic Regression Accuracy:", accuracy_score(y_test, log_pred))
print("Decision Tree Accuracy:", accuracy_score(y_test, dt_pred))

print("\nLogistic Regression MAE:", mean_absolute_error(y_test, log_pred))
print("Decision Tree MAE:", mean_absolute_error(y_test, dt_pred))


Saving earthquake_data.csv to earthquake_data (1).csv
magnitude    0
depth        0
cdi          0
mmi          0
sig          0
alert        0
dtype: int64
(1300, 6)
Logistic Regression Accuracy: 0.6346153846153846
Decision Tree Accuracy: 0.8846153846153846

Logistic Regression MAE: 0.4230769230769231
Decision Tree MAE: 0.15


  df['alert'] = df['alert'].replace({
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
