# 01: Data Loading and Preprocessing

This notebook handles:
- Loading ISRO missions and satellites datasets
- Cleaning column names (removing hidden characters)
- Converting date columns to datetime format
- Adding derived columns (e.g., year)
- Initial data inspection and validation


In [1]:
import pandas as pd

def load_missions_data(filepath):
    missions = pd.read_csv(filepath)
    missions.columns = missions.columns.str.replace('\xa0', ' ', regex=False).str.strip()
    missions['Date'] = pd.to_datetime(missions['Date'], errors='coerce')
    missions['year'] = missions['Date'].dt.year
    return missions

def load_satellites_data(filepath):
    satellites = pd.read_csv(filepath)
    satellites.columns = satellites.columns.str.strip()
    satellites['launch_date'] = pd.to_datetime(satellites['launch_date'], errors='coerce')
    satellites['year'] = satellites['launch_date'].dt.year
    return satellites


In [2]:
missions = load_missions_data('../data/ISRO missions - till AUGUST 2022.csv')
satellites = load_satellites_data('../data/satellites.csv')

print(missions.shape)
print(satellites.shape)
missions.head()


(213, 7)
(53, 7)


  satellites['launch_date'] = pd.to_datetime(satellites['launch_date'], errors='coerce')


Unnamed: 0,Rank,Name,Date,launch vehicle,Application,orbital type,year
0,1,Aryabhata,1975-04-19,C-1 Intercosmos,Experimental,,1975
1,2,Bhaskara-I,1979-06-07,C-1Intercosmos,"Earth Observation, Experimental",LEO,1979
2,3,Rohini Technology Payload (RTP),1979-08-10,SLV-3E1,,,1979
3,4,SLV-3E1,1979-08-10,Rohini Technology Payload (RTP),Partially Successful,,1979
4,5,Rohini Satellite RS-1,1980-07-18,SLV-3E2,,,1980
