In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

In [2]:
df = pd.read_csv('datasets/us_accidents_expanded.csv')

## Machine Learning

### Humidity and Severity

In [3]:
df = df.dropna(subset=['Humidity(%)', 'Severity'])

X = df[['Humidity(%)']] # We can change the feature and target variables according to our purpose
y = df['Severity']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression(max_iter=100)
model.fit(X_train, y_train)

# Predictions and evaluation
y_pred = model.predict(X_test)
print("Classification Report of Humidity and Severity:")
print(classification_report(y_test, y_pred))


Classification Report of Humidity and Severity:
              precision    recall  f1-score   support

           1       0.00      0.00      0.00       825
           2       0.80      1.00      0.89     77873
           3       0.00      0.00      0.00     16530
           4       0.00      0.00      0.00      2546

    accuracy                           0.80     97774
   macro avg       0.20      0.25      0.22     97774
weighted avg       0.63      0.80      0.71     97774



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Visibility and Severity

In [4]:
df = df.dropna(subset=['Visibility(mi)', 'Severity'])

X = df[['Visibility(mi)']] # We can change the feature and target variables according to our purpose
y = df['Severity']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression(max_iter=100)
model.fit(X_train, y_train)

# Predictions and evaluation
y_pred = model.predict(X_test)
print("Classification Report of Visibility and Severity:")
print(classification_report(y_test, y_pred))

Classification Report of Visibility and Severity:
              precision    recall  f1-score   support

           1       0.00      0.00      0.00       807
           2       0.80      1.00      0.89     77492
           3       0.00      0.00      0.00     16458
           4       0.00      0.00      0.00      2514

    accuracy                           0.80     97271
   macro avg       0.20      0.25      0.22     97271
weighted avg       0.63      0.80      0.71     97271



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Distance and Severity

In [5]:
df = df.dropna(subset=['Distance(mi)', 'Severity'])

X = df[['Distance(mi)']] # We can change the feature and target variables according to our purpose
y = df['Severity']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression(max_iter=100)
model.fit(X_train, y_train)

# Predictions and evaluation
y_pred = model.predict(X_test)
print("Classification Report of Distance and Severity:")
print(classification_report(y_test, y_pred))

Classification Report of Distance and Severity:
              precision    recall  f1-score   support

           1       0.00      0.00      0.00       807
           2       0.80      1.00      0.89     77492
           3       0.00      0.00      0.00     16458
           4       0.08      0.00      0.00      2514

    accuracy                           0.80     97271
   macro avg       0.22      0.25      0.22     97271
weighted avg       0.64      0.80      0.71     97271



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Temperature and Severity

In [6]:
df = df.dropna(subset=['Temperature(F)', 'Severity'])

X = df[['Temperature(F)']] # We can change the feature and target variables according to our purpose
y = df['Severity']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression(max_iter=100)
model.fit(X_train, y_train)

# Predictions and evaluation
y_pred = model.predict(X_test)
print("Classification Report of Temperature and Severity:")
print(classification_report(y_test, y_pred))

Classification Report of Temperature and Severity:
              precision    recall  f1-score   support

           1       0.00      0.00      0.00       807
           2       0.80      1.00      0.89     77492
           3       0.00      0.00      0.00     16458
           4       0.00      0.00      0.00      2514

    accuracy                           0.80     97271
   macro avg       0.20      0.25      0.22     97271
weighted avg       0.63      0.80      0.71     97271



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Wind_Chill and Severity

In [7]:
df = df.dropna(subset=['Wind_Chill(F)', 'Severity'])

X = df[['Wind_Chill(F)']] # We can change the feature and target variables according to our purpose
y = df['Severity']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression(max_iter=100)
model.fit(X_train, y_train)

# Predictions and evaluation
y_pred = model.predict(X_test)
print("Classification Report of Wind_Chill and Severity:")
print(classification_report(y_test, y_pred))

Classification Report of Wind_Chill and Severity:
              precision    recall  f1-score   support

           1       0.00      0.00      0.00       815
           2       0.84      1.00      0.91     62028
           3       0.00      0.00      0.00      9162
           4       0.00      0.00      0.00      1838

    accuracy                           0.84     73843
   macro avg       0.21      0.25      0.23     73843
weighted avg       0.71      0.84      0.77     73843



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Pressure and Severity

In [8]:
df = df.dropna(subset=['Pressure(in)', 'Severity'])

X = df[['Pressure(in)']] # We can change the feature and target variables according to our purpose
y = df['Severity']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression(max_iter=100)
model.fit(X_train, y_train)

# Predictions and evaluation
y_pred = model.predict(X_test)
print("Classification Report of Pressure and Severity:")
print(classification_report(y_test, y_pred))

Classification Report of Pressure and Severity:
              precision    recall  f1-score   support

           1       0.00      0.00      0.00       861
           2       0.84      1.00      0.91     61910
           3       0.00      0.00      0.00      9132
           4       0.00      0.00      0.00      1886

    accuracy                           0.84     73789
   macro avg       0.21      0.25      0.23     73789
weighted avg       0.70      0.84      0.77     73789



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Wind_Speed and Severity

In [9]:
df = df.dropna(subset=['Wind_Speed(mph)', 'Severity'])

X = df[['Wind_Speed(mph)']] # We can change the feature and target variables according to our purpose
y = df['Severity']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression(max_iter=100)
model.fit(X_train, y_train)

# Predictions and evaluation
y_pred = model.predict(X_test)
print("Classification Report of Wind_Speed and Severity:")
print(classification_report(y_test, y_pred))

Classification Report of Wind_Speed and Severity:
              precision    recall  f1-score   support

           1       0.00      0.00      0.00       861
           2       0.84      1.00      0.91     61910
           3       1.00      0.00      0.00      9132
           4       0.00      0.00      0.00      1886

    accuracy                           0.84     73789
   macro avg       0.46      0.25      0.23     73789
weighted avg       0.83      0.84      0.77     73789



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Precipitation and Severity

In [10]:
df = df.dropna(subset=['Precipitation(in)', 'Severity'])

X = df[['Precipitation(in)']] # We can change the feature and target variables according to our purpose
y = df['Severity']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression(max_iter=100)
model.fit(X_train, y_train)

# Predictions and evaluation
y_pred = model.predict(X_test)
print("Classification Report of Precipitation and Severity:")
print(classification_report(y_test, y_pred))

Classification Report of Precipitation and Severity:
              precision    recall  f1-score   support

           1       0.00      0.00      0.00       792
           2       0.85      1.00      0.92     57966
           3       0.00      0.00      0.00      7517
           4       0.00      0.00      0.00      1628

    accuracy                           0.85     67903
   macro avg       0.21      0.25      0.23     67903
weighted avg       0.73      0.85      0.79     67903



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
