In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
%matplotlib inline

In [2]:
planets = sns.load_dataset('planets')

In [3]:
planets.shape
# rows, columns

(1035, 6)

In [4]:
planets.head()

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.3,7.1,77.4,2006
1,Radial Velocity,1,874.774,2.21,56.95,2008
2,Radial Velocity,1,763.0,2.6,19.84,2011
3,Radial Velocity,1,326.03,19.4,110.62,2007
4,Radial Velocity,1,516.22,10.5,119.47,2009


In [5]:
# shows average planet mass minus the null fields
planets['mass'].dropna().mean()

2.6381605847953233

In [6]:
planets['mass'].dropna().min()

0.0035999999999999999

In [7]:
# shows a count of the planets minus the nulls
planets['mass'].dropna().count()

513

In [8]:
planets['distance'].dropna().mean()

264.06928217821786

In [9]:
planets['year'].dropna().mean()

2009.0705314009663

In [10]:
# gives us a description of all the data i.e. a summary
planets.dropna().describe()

Unnamed: 0,number,orbital_period,mass,distance,year
count,498.0,498.0,498.0,498.0,498.0
mean,1.73494,835.778671,2.50932,52.068213,2007.37751
std,1.17572,1469.128259,3.636274,46.596041,4.167284
min,1.0,1.3283,0.0036,1.35,1989.0
25%,1.0,38.27225,0.2125,24.4975,2005.0
50%,1.0,357.0,1.245,39.94,2009.0
75%,2.0,999.6,2.8675,59.3325,2011.0
max,6.0,17337.5,25.0,354.0,2014.0


In [11]:
# lets figure out what changed in the method between 2005 and 2009
planets.groupby('method')

<pandas.core.groupby.DataFrameGroupBy object at 0x10ae9fe10>

In [12]:
planets.groupby('method')['year']

<pandas.core.groupby.SeriesGroupBy object at 0x10aed8668>

In [13]:
planets.groupby('method')['year'].mean()

method
Astrometry                       2011.500000
Eclipse Timing Variations        2010.000000
Imaging                          2009.131579
Microlensing                     2009.782609
Orbital Brightness Modulation    2011.666667
Pulsar Timing                    1998.400000
Pulsation Timing Variations      2007.000000
Radial Velocity                  2007.518987
Transit                          2011.236776
Transit Timing Variations        2012.500000
Name: year, dtype: float64

In [14]:
planets.groupby('method')['year'].count()

method
Astrometry                         2
Eclipse Timing Variations          9
Imaging                           38
Microlensing                      23
Orbital Brightness Modulation      3
Pulsar Timing                      5
Pulsation Timing Variations        1
Radial Velocity                  553
Transit                          397
Transit Timing Variations          4
Name: year, dtype: int64

In [15]:
# when was the first planet discovered using each method
planets.groupby('method')['year'].min()

method
Astrometry                       2010
Eclipse Timing Variations        2008
Imaging                          2004
Microlensing                     2004
Orbital Brightness Modulation    2011
Pulsar Timing                    1992
Pulsation Timing Variations      2007
Radial Velocity                  1989
Transit                          2002
Transit Timing Variations        2011
Name: year, dtype: int64

In [16]:
planets.groupby('method')['year'].aggregate([np.min, np.mean])

Unnamed: 0_level_0,amin,mean
method,Unnamed: 1_level_1,Unnamed: 2_level_1
Astrometry,2010,2011.5
Eclipse Timing Variations,2008,2010.0
Imaging,2004,2009.131579
Microlensing,2004,2009.782609
Orbital Brightness Modulation,2011,2011.666667
Pulsar Timing,1992,1998.4
Pulsation Timing Variations,2007,2007.0
Radial Velocity,1989,2007.518987
Transit,2002,2011.236776
Transit Timing Variations,2011,2012.5


In [17]:
planets.groupby('method')['year'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Astrometry,2.0,2011.5,2.12132,2010.0,2010.75,2011.5,2012.25,2013.0
Eclipse Timing Variations,9.0,2010.0,1.414214,2008.0,2009.0,2010.0,2011.0,2012.0
Imaging,38.0,2009.131579,2.781901,2004.0,2008.0,2009.0,2011.0,2013.0
Microlensing,23.0,2009.782609,2.859697,2004.0,2008.0,2010.0,2012.0,2013.0
Orbital Brightness Modulation,3.0,2011.666667,1.154701,2011.0,2011.0,2011.0,2012.0,2013.0
Pulsar Timing,5.0,1998.4,8.38451,1992.0,1992.0,1994.0,2003.0,2011.0
Pulsation Timing Variations,1.0,2007.0,,2007.0,2007.0,2007.0,2007.0,2007.0
Radial Velocity,553.0,2007.518987,4.249052,1989.0,2005.0,2009.0,2011.0,2014.0
Transit,397.0,2011.236776,2.077867,2002.0,2010.0,2012.0,2013.0,2014.0
Transit Timing Variations,4.0,2012.5,1.290994,2011.0,2011.75,2012.5,2013.25,2014.0


In [18]:
mass_per_distance = planets['mass'] / planets['distance']

In [19]:
type(mass_per_distance)

pandas.core.series.Series

In [20]:
mass_per_distance.head()

0    0.091731
1    0.038806
2    0.131048
3    0.175375
4    0.087888
dtype: float64

In [21]:
planets['M/D'] = mass_per_distance

In [22]:
planets.head()

Unnamed: 0,method,number,orbital_period,mass,distance,year,M/D
0,Radial Velocity,1,269.3,7.1,77.4,2006,0.091731
1,Radial Velocity,1,874.774,2.21,56.95,2008,0.038806
2,Radial Velocity,1,763.0,2.6,19.84,2011,0.131048
3,Radial Velocity,1,326.03,19.4,110.62,2007,0.175375
4,Radial Velocity,1,516.22,10.5,119.47,2009,0.087888


In [23]:
percentage_of_earth_year = planets['orbital_period'] / 365

In [24]:
percentage_of_earth_year

0        0.737808
1        2.396641
2        2.090411
3        0.893233
4        1.414301
5        0.509151
6        4.858630
7        2.187671
8        2.721370
9        1.240548
10       2.419178
11       0.918082
12       1.312603
13       2.953425
14       6.550685
15      38.361644
16       0.011591
17       0.040140
18       0.121589
19      13.449315
20       0.002018
21       0.715616
22       0.011548
23       0.104167
24       0.337014
25       0.319694
26       1.895616
27       2.610137
28       0.496986
29            NaN
          ...    
1005     0.010120
1006     0.012235
1007     0.012650
1008     0.007778
1009     0.013746
1010     0.021697
1011     0.011795
1012     0.010564
1013     0.012088
1014     0.011995
1015     0.004310
1016     0.006333
1017     0.011195
1018     0.012642
1019     0.007955
1020     0.006073
1021     0.006806
1022     0.003726
1023     0.005959
1024     0.010034
1025     0.008405
1026     0.002536
1027          NaN
1028     0.009184
1029      