In [None]:
from google.colab import drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Load dataset from Google Drive
file_path = '/content/drive/My Drive/Airplane_Crashes_and_Fatalities_Since_1990.csv'
df = pd.read_excel(file_path)

# Display the first few rows to check the data
print(df.head())

# Check for missing data and handle it
print("Missing values in each column:")
print(df.isnull().sum())

# Preprocess data (handle missing values, convert to numerical, etc.)
# Fill missing values with the mean of each column (only for numeric columns)
df = df.fillna(df.mean(numeric_only=True))

# Check the data types of each column
print("Data types of each column:")
print(df.dtypes)

# Normalize or scale the data (Only for numerical columns)
scaler = MinMaxScaler()

# Assuming the dataset has some numeric columns to normalize, we exclude non-numeric columns like 'date', 'flight number', etc.
numerical_cols = df.select_dtypes(include=[np.number]).columns

scaled_data = df[numerical_cols].copy()  # Copy only numerical columns for scaling
scaled_data = scaler.fit_transform(scaled_data)

# Convert the scaled data back into a DataFrame
scaled_df = pd.DataFrame(scaled_data, columns=numerical_cols)

# Display the scaled data
print("Scaled data:")
print(scaled_df.head())

# You can continue with more preprocessing steps if needed, such as feature engineering, encoding categorical data, etc.



        Date  Fatalities
0 1990-01-02         9.0
1 1990-01-13        27.0
2 1990-01-15        23.0
3 1990-01-17         1.0
4 1990-01-18         2.0
Missing values in each column:
Date          0
Fatalities    1
dtype: int64
Data types of each column:
Date          datetime64[ns]
Fatalities           float64
dtype: object
Scaled data:
   Fatalities
0    0.025788
1    0.077364
2    0.065903
3    0.002865
4    0.005731


In [None]:
print(df.columns)


Index(['Date', 'Fatalities'], dtype='object')


In [None]:
X = df.drop(columns=['Fatalities'])  # Example: Replace with your actual target column name
y = df['Fatalities']


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Replace 'Fatalities' with your actual target column name
X = df.drop(columns=['Fatalities'])  # Features
y = df['Fatalities']                 # Target variable

# Scale features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Reshape for LSTM
X_train = np.expand_dims(X_train, axis=1)  # Adding a time dimension
X_test = np.expand_dims(X_test, axis=1)

# Define LSTM model
model_lstm = Sequential()
model_lstm.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model_lstm.add(LSTM(50))
model_lstm.add(Dense(1))

# Compile the model
model_lstm.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model_lstm.fit(X_train, y_train, epochs=10, batch_size=32)

# Evaluate the model
loss = model_lstm.evaluate(X_test, y_test)
print("Test Loss:", loss)


  super().__init__(**kwargs)


Epoch 1/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - loss: 1966.3470
Epoch 2/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 2255.6372
Epoch 3/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1732.4312
Epoch 4/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1479.4742
Epoch 5/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1855.8694
Epoch 6/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1507.3713
Epoch 7/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1556.4221
Epoch 8/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1554.4370
Epoch 9/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1341.0629
Epoch 10/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/st

In [None]:
#RMSE Calculation
from sklearn.metrics import mean_squared_error
import math

# Predictions
y_pred = model_lstm.predict(X_test)

# Calculate RMSE
rmse = math.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE: {rmse}")


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
RMSE: 35.34346733668333
