In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import pandas as pd

In [None]:
df= pd.read_csv('/content/event_gen_1.1.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 281 entries, 0 to 280
Data columns (total 14 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Month                 281 non-null    int64  
 1   Year                  281 non-null    int64  
 2   Country               281 non-null    object 
 3   Latitude              281 non-null    float64
 4   Longitude             281 non-null    float64
 5   Purpose of gathering  281 non-null    object 
 6   Fatalities            281 non-null    int64  
 7   Injured               281 non-null    int64  
 8   Crowd size            281 non-null    int64  
 9   entrance              281 non-null    int64  
 10  exit                  281 non-null    int64  
 11  A                     281 non-null    int64  
 12  B                     281 non-null    int64  
 13  other                 281 non-null    int64  
dtypes: float64(2), int64(10), object(2)
memory usage: 30.9+ KB


In [None]:
df['Purpose of gathering'].unique()

array(['Religious', 'Entertainment', 'Sport', 'Other/Unknown',
       'Political', 'Transportation', 'Educational', 'Donation',
       'Shopping', 'Application'], dtype=object)

In [None]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()

# Apply LabelEncoder to 'event type' and 'Country'
df['Purpose of gathering'] = label_encoder.fit_transform(df['Purpose of gathering'])
event_type_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print("Event type mapping:", event_type_mapping)
df['Country'] = label_encoder.fit_transform(df['Country'])
country_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print("Country mapping:", country_mapping)

Event type mapping: {'Application': 0, 'Donation': 1, 'Educational': 2, 'Entertainment': 3, 'Other/Unknown': 4, 'Political': 5, 'Religious': 6, 'Shopping': 7, 'Sport': 8, 'Transportation': 9}
Country mapping: {'Afghanistan': 0, 'Algeria': 1, 'Angola': 2, 'Argentina': 3, 'Australia': 4, 'Austria': 5, 'Bangladesh': 6, 'Belarus': 7, 'Belgium': 8, 'Benin': 9, 'Brazil': 10, 'Bulgaria': 11, 'Burkina Faso': 12, 'Cambodia': 13, 'Cameroon': 14, 'Chile': 15, 'China': 16, 'Colombia': 17, 'DRC': 18, 'Denmark': 19, 'Egypt': 20, 'Ethiopia': 21, 'France': 22, 'Germany': 23, 'Ghana': 24, 'Greece': 25, 'Guatemala': 26, 'Guinea': 27, 'Haiti': 28, 'Honduras': 29, 'Hong Kong': 30, 'Hungary': 31, 'India': 32, 'Indonesia': 33, 'Iran': 34, 'Iraq': 35, 'Italy': 36, 'Ivory Coast': 37, 'Japan': 38, 'Kenya': 39, 'Liberia': 40, 'Madagascar': 41, 'Malawi': 42, 'Malaysia': 43, 'Mali': 44, 'Malta': 45, 'Mexico': 46, 'Morocco': 47, 'Nepal': 48, 'Netherlands': 49, 'Nigeria': 50, 'North Korea': 51, 'Pakistan': 52, 'Per

In [None]:
import json


event_type_mapping = {key: int(value) for key, value in event_type_mapping.items()}
country_mapping = {key: int(value) for key, value in country_mapping.items()}

with open('event_type_mapping.json', 'w') as f:
    json.dump(event_type_mapping, f)

with open('country_mapping.json', 'w') as f:
    json.dump(country_mapping, f)

print("Mappings saved as JSON!")

Mappings saved as JSON!


In [None]:
df.head()

Unnamed: 0,Month,Year,Country,Latitude,Longitude,Purpose of gathering,Fatalities,Injured,Crowd size,entrance,exit,A,B,other
0,9,1902,69,33.513,-86.894,6,115,80,2000,111,141,729,653,366
1,1,1908,71,53.554,-1.479,3,16,40,45492,11788,12295,7054,8352,6003
2,12,1913,69,47.2484,-88.4553,3,73,0,45492,13495,13311,7181,7754,3751
3,2,1914,71,53.411389,-1.500556,8,0,75,43000,10668,9358,9940,9619,3415
4,12,1929,71,55.846,-4.423,3,71,0,600,163,175,114,111,37


In [None]:
df=df.drop(['Longitude','Latitude'],axis=1)

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 281 entries, 0 to 280
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype
---  ------                --------------  -----
 0   Month                 281 non-null    int64
 1   Year                  281 non-null    int64
 2   Country               281 non-null    int64
 3   Purpose of gathering  281 non-null    int64
 4   Fatalities            281 non-null    int64
 5   Injured               281 non-null    int64
 6   Crowd size            281 non-null    int64
 7   entrance              281 non-null    int64
 8   exit                  281 non-null    int64
 9   A                     281 non-null    int64
 10  B                     281 non-null    int64
 11  other                 281 non-null    int64
dtypes: int64(12)
memory usage: 26.5 KB


In [None]:
features = ['Month', 'Year', 'Country', 'Purpose of gathering', 'Crowd size']
targets = ['Fatalities', 'Injured', 'entrance', 'exit', 'A', 'B', 'other']

In [None]:
X = df[features]
y = df[targets]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
models = {}
predictions = {}

for target in targets:
    model = LinearRegression()
    model.fit(X_train, y_train[target])
    models[target] = model

    y_pred = model.predict(X_test)
    predictions[target] = y_pred

    mse = mean_squared_error(y_test[target], y_pred)
    r2 = r2_score(y_test[target], y_pred)

    print(f"Target: {target}")
    print(f"Mean Squared Error: {mse:.2f}")
    print(f"R^2 Score: {r2:.2f}\n")

Target: Fatalities
Mean Squared Error: 4013.17
R^2 Score: -0.15

Target: Injured
Mean Squared Error: 22686.06
R^2 Score: -0.13

Target: entrance
Mean Squared Error: 336171479.36
R^2 Score: 0.48

Target: exit
Mean Squared Error: 451706859.98
R^2 Score: 0.36

Target: A
Mean Squared Error: 287483667.30
R^2 Score: 0.74

Target: B
Mean Squared Error: 261685652.06
R^2 Score: 0.75

Target: other
Mean Squared Error: 79171466.91
R^2 Score: 0.64



In [None]:
new_data = pd.DataFrame({
    'Month': [9],
    'Year': [2024],
    'Country': [70],
    'Purpose of gathering': [0],
    'Crowd size': [10000]
})


In [None]:
predictions_new = {}

for target, model in models.items():
    predictions_new[target] = model.predict(new_data)[0]

print(predictions_new)

{'Fatalities': 53.21722782746099, 'Injured': -104.09028977640673, 'entrance': 1368.4610406721768, 'exit': 3709.690676675178, 'A': 1574.6156730921357, 'B': 1781.8796211289591, 'other': 1565.3529884337331}


In [None]:
import joblib

for target, model in models.items():
    joblib.dump(model, f'{target}_model.joblib')
    print(f"Saved model for {target} to {target}_model.joblib")


Saved model for Fatalities to Fatalities_model.joblib
Saved model for Injured to Injured_model.joblib
Saved model for entrance to entrance_model.joblib
Saved model for exit to exit_model.joblib
Saved model for A to A_model.joblib
Saved model for B to B_model.joblib
Saved model for other to other_model.joblib


**New set of models**

In [None]:
from sklearn.model_selection import train_test_split

# Features (independent variables)
X = df.drop(columns=['Fatalities', 'Injured'])

# Target variables (dependent variables)
y_fatalities = df['Fatalities']
y_injured = df['Injured']

# Splitting the data
X_train, X_test, y_train_fatalities, y_test_fatalities = train_test_split(X, y_fatalities, test_size=0.2, random_state=42)
X_train, X_test, y_train_injured, y_test_injured = train_test_split(X, y_injured, test_size=0.2, random_state=42)


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Model for Fatalities
model_fatalities = LinearRegression()
model_fatalities.fit(X_train, y_train_fatalities)

# Predict on the test set
y_pred_fatalities = model_fatalities.predict(X_test)

# Evaluate
mse_fatalities = mean_squared_error(y_test_fatalities, y_pred_fatalities)
r2_fatalities = r2_score(y_test_fatalities, y_pred_fatalities)

print(f'Fatalities - MSE: {mse_fatalities}, R2 Score: {r2_fatalities}')

Fatalities - MSE: 5502.764440868818, R2 Score: -0.5781827948974458


In [None]:
# Example new event details
new_event = {
    'Month': 9,
    'Year': 2024,
    'Country': 70,
    'Purpose of gathering': 0,
    'Crowd size': 10000,
    'entrance': 3,
    'exit': 2,
    'A': 1,
    'B': 2,
    'other': 1
}


new_event_df = pd.DataFrame([new_event])
new_event_encoded = pd.get_dummies(new_event_df, columns=['Country', 'Purpose of gathering'])
new_event_encoded = new_event_encoded.reindex(columns=X.columns, fill_value=0)
predicted_fatalities = model_fatalities.predict(new_event_encoded)
print(f"Predicted Fatalities: {predicted_fatalities[0]}")

Predicted Fatalities: 17.787709570832362
