#### Libraries

In [1]:
import pandas as pd
import json
from datetime import datetime, time
import matplotlib.pyplot as plt
import re

#### Data

In [2]:
historical_launches_df = pd.read_json("../data/raw/launch/historical_launches.json")

In [3]:
def split_column(df, original_column, new_column1, new_column2, separator='|'):
    """
    Split a column into two new columns, keeping the original.

    Parameters:
    df (pandas.DataFrame): The DataFrame containing the column to be split.
    original_column (str): The name of the column to be split.
    new_column1 (str): The name for the first new column.
    new_column2 (str): The name for the second new column.
    separator (str): The separator to split on. Defaults to '|'.

    Returns:
    pandas.DataFrame: The DataFrame with new columns added.
    """
    # Create a copy of the DataFrame to avoid modifying the original
    df_copy = df.copy()

    # Split the specified column and remove whitespace
    split_data = df_copy[original_column].str.split(separator, expand=True, n=1)
    df_copy[new_column1] = split_data[0].str.strip()
    df_copy[new_column2] = split_data[1].str.strip()

    return df_copy

# Usage
historical_launches_df = split_column(
    df=historical_launches_df,
    original_column='location',
    new_column1='pad',
    new_column2='location'
)

In [4]:
def parse_launch_date(date_string):
    try:
        date_string = date_string.replace('.', '').upper()
        for fmt in ('%m/%d/%Y %I:%M %p', '%m/%d/%Y %I %p', '%m/%d/%Y %I%p', '%m/%d/%Y %p'):
            try:
                return datetime.strptime(date_string, fmt)
            except ValueError:
                pass
        if 'MIDNIGHT' in date_string:
            return datetime.strptime(date_string.replace('MIDNIGHT', '12:00 AM'), '%m/%d/%Y %I:%M %p')
        elif 'NOON' in date_string:
            return datetime.strptime(date_string.replace('NOON', '12:00 PM'), '%m/%d/%Y %I:%M %p')
        raise ValueError(f"Unable to parse date string: {date_string}")
    except Exception as e:
        print(f"Error parsing date: {date_string}. Error: {str(e)}")
        return None

# Usage
historical_launches_df['date'] = historical_launches_df['date'].apply(parse_launch_date)

In [5]:
historical_launches_df['date'].max()

Timestamp('2024-08-06 02:42:00')

In [6]:
# Filter for rows where 'location' contains 'USA'
usa_launches = historical_launches_df[historical_launches_df['location'].str.contains('USA', case=False, na=False)]
len(usa_launches)

2103

In [12]:
# Create a pivot table showing status per location
status_per_location = usa_launches.pivot_table(
    index='location',
    columns='status',
    aggfunc='size',
    fill_value=0
)

# Calculate the total launches per location
status_per_location['Total'] = status_per_location.sum(axis=1)

# Sort the table by total launches in descending order
status_per_location_sorted = status_per_location.sort_values('Total', ascending=False)

# Display the table
status_per_location_sorted

status,Launch Failure,Launch Successful,Launch was a Partial Failure,Total
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"Cape Canaveral, FL, USA",75,887,5,967
"Vandenberg SFB, CA, USA",67,695,3,765
"Kennedy Space Center, FL, USA",2,238,1,241
"Wallops Island, Virginia, USA",7,68,3,78
"Corn Ranch, Van Horn, TX, USA",1,24,0,25
"SpaceX Starbase, TX, USA",2,8,3,13
"Pacific Spaceport Complex, Alaska, USA",4,5,0,9
"Spaceport America, NM, USA",1,0,3,4
"Kauai, USA",1,0,0,1


```python
spaceport_coordinates = [
    ("Cape Canaveral, FL, USA", 28.4889, -80.5778),
    ("Vandenberg SFB, CA, USA", 34.7420, -120.5724),
    ("Wallops Island, Virginia, USA", 37.9401, -75.4664),
    ("Kennedy Space Center, FL, USA", 28.5728, -80.6490),
    ("Pacific Spaceport Complex, Alaska, USA", 57.4356, -152.3378),
    ("Corn Ranch, Van Horn, TX, USA", 31.4233, -104.7587),
    ("Kauai, USA", 22.0379, -159.7567),
    ("Spaceport America, NM, USA", 32.9903, -106.9750),
    ("SpaceX Starbase, TX, USA", 25.9971, -97.1554)
]
```

In [None]:
# usa_launches.to_csv('../data/transformed/launch/usa_launches.csv', index=False)