In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Load the dataset
file_path = 'dataset_falcon9.csv'
df = pd.read_csv(file_path)

# Convert 'Date' to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Select relevant features
selected_features = ['PayloadMass', 'Orbit', 'LaunchSite', 'Flights', 'GridFins', 
                     'Reused', 'Legs', 'Block', 'ReusedCount', 'Longitude', 'Latitude']

# One-hot encode categorical features
categorical_features = ['Orbit', 'LaunchSite', 'GridFins', 'Reused', 'Legs']
df_encoded = pd.get_dummies(df[selected_features], columns=categorical_features)

# Extract the target variable
target = df['Class']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df_encoded, target, test_size=0.3, random_state=42)

# Standardize the feature values
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build and train the logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = model.predict(X_test_scaled)

# Evaluate the model
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

# Print evaluation results
print("Confusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)
print("\nAccuracy Score:")
print(accuracy)

# Display the first few rows of the processed data for reference
print("\nProcessed Data Sample:")
print(pd.DataFrame(X_train_scaled, columns=X_train.columns).head())

# Display predictions for the test set
predictions_df = X_test.copy()
predictions_df['Actual'] = y_test
predictions_df['Predicted'] = y_pred
print("\nSample Predictions:")
print(predictions_df.head(14))  # Display the first 14 predictions


Confusion Matrix:
[[ 4  3]
 [ 0 20]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.57      0.73         7
           1       0.87      1.00      0.93        20

    accuracy                           0.89        27
   macro avg       0.93      0.79      0.83        27
weighted avg       0.90      0.89      0.88        27


Accuracy Score:
0.8888888888888888

Processed Data Sample:
   PayloadMass   Flights     Block  ReusedCount  Longitude  Latitude  \
0     0.043923  0.873024  0.938963     0.269330   0.434784 -0.439082   
1    -0.742706 -0.676286 -0.333180    -0.336663   0.432966 -0.418219   
2    -0.894347  0.098369  0.938963     0.875324   0.434784 -0.439082   
3    -0.385901 -0.676286 -1.605324    -0.942656   0.434784 -0.439082   
4     1.931667  1.647679  0.938963     1.481317   0.434784 -0.439082   

   Orbit_ES-L1  Orbit_GEO  Orbit_GTO  Orbit_HEO  ...  Orbit_VLEO  \
0          0.0     -0.127   1.644957     -0.127  ..