In [29]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [30]:
data = pd.read_csv("spacex_cleaned.csv")

In [25]:
data = data.dropna(subset = ["success"])

In [26]:
data["payload_mass"] = data["payload_mass"].fillna(data.groupby('rocket_name')["payload_mass"].transform('mean'))

In [27]:
data['success'] = data['success'].astype(int)
data

Unnamed: 0,mission_name,launch_date,rocket_name,payload_mass,orbit,site_name,location,success,weather_condition
0,FalconSat,2006-03-24T22:30:00.000Z,Falcon 1,20.000000,LEO,Kwajalein Atoll,Omelek Island,0,Clear
1,DemoSat,2007-03-21T01:10:00.000Z,Falcon 1,128.333333,LEO,Kwajalein Atoll,Omelek Island,0,Clear
2,Trailblazer,2008-08-03T03:34:00.000Z,Falcon 1,128.333333,LEO,Kwajalein Atoll,Omelek Island,0,Clear
3,RatSat,2008-09-28T23:15:00.000Z,Falcon 1,165.000000,LEO,Kwajalein Atoll,Omelek Island,1,Clear
4,RazakSat,2009-07-13T03:35:00.000Z,Falcon 1,200.000000,LEO,Kwajalein Atoll,Omelek Island,1,Clear
...,...,...,...,...,...,...,...,...,...
182,Starlink 4-20 (v1.5) & Sherpa LTC-2/Varuna-TDM,2022-09-05T02:09:00.000Z,Falcon 9,13260.000000,VLEO,CCSFS SLC 40,Cape Canaveral,1,Clear
183,Starlink 4-2 (v1.5) & Blue Walker 3,2022-09-11T01:10:00.000Z,Falcon 9,13260.000000,VLEO,KSC LC 39A,Cape Canaveral,1,Clear
184,Starlink 4-34 (v1.5),2022-09-17T01:05:00.000Z,Falcon 9,13260.000000,VLEO,CCSFS SLC 40,Cape Canaveral,1,Clear
185,Starlink 4-35 (v1.5),2022-09-24T23:30:00.000Z,Falcon 9,13260.000000,VLEO,CCSFS SLC 40,Cape Canaveral,1,Clear


In [28]:
data.to_csv('filename.csv', index=False)

In [None]:
import pandas as pd
import requests

# Load your data
df = pd.read_csv('data.csv')

# Caching launchpad details
launchpad_cache = {}

def get_launchpad_info(launchpad_id):
    if launchpad_id in launchpad_cache:
        return launchpad_cache[launchpad_id]
    
    url = f'https://api.spacexdata.com/v4/launchpads/{launchpad_id}'
    res = requests.get(url)
    if res.status_code == 200:
        data = res.json()
        info = {
            'latitude': data['latitude'],
            'longitude': data['longitude'],
            'name': data['name'],
            'region': data['region']
        }
        launchpad_cache[launchpad_id] = info
        return info
    else:
        print(f"⚠️ Failed to get launchpad: {launchpad_id}")
        return None

# Prepare output columns
weather_list = []
temp_list = []
wind_list = []

# Go row by row
for idx, row in df.iterrows():
    try:
        mission = row['name']
        launch_date = row['date_utc'].split('T')[0]
        launchpad_id = row['launchpad']
        
        info = get_launchpad_info(launchpad_id)
        if info is None:
            raise Exception("Missing launchpad info.")

        lat = info['latitude']
        lon = info['longitude']
        site_name = info['name']
        site_region = info['region']
        
        # Open-Meteo API call
        weather_url = (
            f"https://archive-api.open-meteo.com/v1/archive"
            f"?latitude={lat}&longitude={lon}"
            f"&start_date={launch_date}&end_date={launch_date}"
            f"&daily=temperature_2m_max,temperature_2m_min,windspeed_10m_max"
            f"&timezone=UTC"
        )
        
        res = requests.get(weather_url)
        if res.status_code != 200:
            raise Exception("Weather fetch error")

        weather_data = res.json()
        daily = weather_data.get('daily', {})
        
        if 'temperature_2m_max' in daily and daily['temperature_2m_max']:
            temp_max = daily['temperature_2m_max'][0]
            temp_min = daily['temperature_2m_min'][0]
            wind = int(round(daily['windspeed_10m_max'][0]))
            temp = int(round((temp_max + temp_min) / 2))
            
            # Simulated condition (you can improve this with actual precipitation data)
            condition = "Light rain" if wind > 15 or temp < 20 else "Clear"

            # Store values
            weather_list.append(condition)
            temp_list.append(temp)
            wind_list.append(wind)

            # Print progress
            print(f"Mission: {mission}")
            print(f"Launch Date: {launch_date}")
            print(f"Launch Site: {site_name}, {site_region}")
            print(f"Weather: {condition}, Temp: {temp}°C, Wind: {wind} km/h\n")

        else:
            raise Exception("Incomplete weather data")

    except Exception as e:
        print(f"❌ Failed to fetch weather for {row['name']} on {row['date_utc'][:10]}: {e}")
        weather_list.append("Unavailable")
        temp_list.append(None)
        wind_list.append(None)

# Add new columns
df['Weather'] = weather_list
df['Temperature'] = temp_list
df['Wind'] = wind_list

# Save updated CSV
df.to_csv("data_with_weather.csv", index=False)
print("✅ All done. Weather data saved to 'data_with_weather.csv'")


In [53]:
import pandas as pd

df = pd.read_csv('spacex_final.csv')
categorical_cols = ['Weather', 'site_name', 'location', 'rocket_name', 'orbit']

dummies = pd.get_dummies(df[categorical_cols], prefix=categorical_cols)
df = pd.concat([df, dummies], axis=1)
df = pd.concat([df.drop(columns=categorical_cols), dummies], axis=1)
df.to_csv('spacex_final.csv', index=False)

In [190]:
dropable_data = ["success", "mission_name", "launch_date", "payload_mass"]
X = df.drop(dropable_data, axis=1)
y = df['success']

In [191]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [198]:
from sklearn.linear_model import LinearRegression, SGDRegressor
model = LinearRegression()

In [199]:
model.fit(X_train, y_train)

In [200]:
from sklearn.metrics import mean_squared_error, r2_score

y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

print(r2_score(y_train, y_train_pred)*100)
print(r2_score(y_test, y_test_pred)*100)

36.9309395768157
0.0


In [213]:
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor

# Random Forest model
model = RandomForestRegressor(n_estimators=2, random_state=42)

# Cross-validation with R² scoring
cv_scores = cross_val_score(model, X, y, cv=80, scoring='r2')
cv_scores1 = cross_val_score(model, X_test, y_test, cv=19, scoring='r2')
cv_scores2 = cross_val_score(model, X_train, y_train, cv=38, scoring='r2')

# Output results
print(f"R² Score: {np.mean(cv_scores) * 100}%")
print(f"R² Score: {np.mean(cv_scores1) * 100}%")
print(f"R² Score: {np.mean(cv_scores2) * 100}%")

R² Score: 91.25%
R² Score: 100.0%
R² Score: 77.19298245614036%
