In [1]:
# Import package
import pandas as pd
import numpy as np
from collections import defaultdict

In [2]:
# Read data
df = pd.read_csv('interplanetary-parsed.csv')

In [3]:
# Parse data and add countries
agency2country = {'cnsa': 'China', 'esa': 'EU', 'isro': 'India', 'jaxa': 'Japan', 'nasa': 'USA', 'roscosmos': 'Russia', 'soviet': 'Soviet Union'}
agencies = list(df.agency)
countries = [agency2country[agency] for agency in agencies]
df['country'] = countries

In [4]:
# Save the data
df.to_csv('interplanetary-parsed-with-country.csv', index_label=False, index=False)

In [5]:
# Get the data by destination
df_by_dest = {}

dests = list(np.unique(df.to))
for dest in dests:
    df_by_dest[dest] = df[df['to'] == dest]

In [6]:
# Save the data by destination
for dest in df_by_dest:
    df_dest = df_by_dest[dest]
    dest = dest.replace('/', '-')
    df_dest.to_csv('interplanetary-{}.csv'.format(dest), index_label=False, index=False)

In [7]:
df_by_dest['mars'][:10]

Unnamed: 0,name,success,type,from,to,object,launch,finish,cospar,agency,country
0,Mars 1M,False,flyby,earth,mars,planet,1960-10-10,1960-10-10,MARSNK1,soviet,Soviet Union
1,Mars 2M,False,towards,earth,mars,planet,1960-10-14,1960-10-14,MARSNK2,soviet,Soviet Union
8,Sputnik 22,False,towards,earth,mars,planet,1962-10-24,1962-10-29,,soviet,Soviet Union
9,Mars 1,False,towards,earth,mars,planet,1962-11-01,1963-03-21,,soviet,Soviet Union
10,Sputnik 24,False,towards,earth,mars,planet,1962-11-04,1962-11-25,,soviet,Soviet Union
16,Mariner 3,False,towards,earth,mars,planet,1964-11-05,1964-11-05,,nasa,USA
17,Mariner 4,True,flyby,earth,mars,planet,1964-11-28,1967-12-21,,nasa,USA
18,Zond 2,False,towards,earth,mars,planet,1964-11-30,1965-08-06,,soviet,Soviet Union
23,Unnamed,False,towards,earth,mars,planet,1967-03-27,1967-03-27,,soviet,Soviet Union
29,Mariner 6,True,flyby,earth,mars,planet,1969-02-25,,,nasa,USA


In [8]:
'''
Make stacked data
- year, country1, country2, ....
- stacked_data[dest][year][country] = #
'''
stacked_data = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0)))

for dest in df_by_dest:
    df_dest = df_by_dest[dest]
    for row in df_dest.iterrows():
            date, country = row[1]['launch'], row[1]['country']
            year = int(date.split('-')[0])
            stacked_data[dest][year][country] += 1

In [None]:
'''
Save the stacked data into file
'''
uniq_countries = np.unique(countries)
for dest in stacked_data.keys():
    dest_name = dest.replace('/', '-')
    with open('stacked-{}.csv'.format(dest_name), 'w') as f:
        print('.     ', len(stacked_data))
        f.write('date,')
        f.write(','.join(uniq_countries))
        f.write('\n')
        for year in stacked_data[dest_name].keys():
            f.write('%d' % year)
            for country in uniq_countries:
                num = stacked_data[dest][year][country]
                f.write(',%d' % num)
            f.write('\n')

In [None]:
stacked_data.keys()

In [None]:
a