In [2]:
# Import Dependencies
import pandas as pd

In [3]:
# Read file into DataFrame
ufo_df = pd.read_csv('Resources/ufoSightings.csv', low_memory=False)

# Remove the rows with missing data
clean_ufo_df = ufo_df.dropna(how="any")

# Converting the "duration (seconds)" column's values to numeric
converted_ufo_df = clean_ufo_df.copy()
converted_ufo_df["duration (seconds)"] = converted_ufo_df.loc[:, "duration (seconds)"].astype(float)

# Display the DataFrame
converted_ufo_df.head()

Unnamed: 0,datetime,city,state,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700.0,45 minutes,This event took place in early fall around 194...,4/27/2004,29.8830556,-97.941111
3,10/10/1956 21:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.9783333,-96.645833
4,10/10/1960 20:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.4180556,-157.803611
5,10/10/1961 19:00,bristol,tn,us,sphere,300.0,5 minutes,My father is now 89 my brother 52 the girl wit...,4/27/2007,36.595,-82.188889
7,10/10/1965 23:45,norwalk,ct,us,disk,1200.0,20 minutes,A bright orange color changing to reddish colo...,10/2/1999,41.1175,-73.408333


### Single Index (review)

In [4]:
pd.pivot_table(converted_ufo_df, index='country', values='duration (seconds)', aggfunc='mean').round(1)

Unnamed: 0_level_0,duration (seconds)
country,Unnamed: 1_level_1
au,252.5
ca,29175.3
gb,8343.6
us,5527.4


In [6]:
# Show the average seconds for each country. And, round to one decimal place.
converted_ufo_df_se = pd.pivot_table(converted_ufo_df, columns='country', 
                                     values='duration (seconds)', aggfunc='mean').round(1)

# Rename the index
converted_ufo_df_se.rename(index={'duration (seconds)' : 'Duration: Avg. Seconds'})

country,au,ca,gb,us
Duration: Avg. Seconds,252.5,29175.3,8343.6,5527.4


### Multi-Indexing

In [None]:
# # Show the average seconds for each country and state and round to one decimal place
# ufo_country_state = converted_ufo_df.groupby(by=['country','state'])[['duration (seconds)']].mean().round(1)

# pd.pivot_table(converted_ufo_df, 
#                                    index=['country','state'],
#                                    values='duration (seconds)',
#                                    aggfunc='mean').round(1)
# # Show the table.
# ufo_country_state.head(20)

In [8]:
converted_ufo_df.groupby(by=['country','state'])[['duration (seconds)']].mean().round(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,duration (seconds)
country,state,Unnamed: 2_level_1
au,al,900.0
au,dc,300.0
au,nt,180.0
au,oh,180.0
au,sa,152.5
...,...,...
us,vt,1042.5
us,wa,15273.5
us,wi,1928.4
us,wv,6791.9


In [11]:
# Show the average seconds for each country and state and round to one decimal place
ufo_country_state = pd.pivot_table(converted_ufo_df, index=['country', 'state'], 
                                   values='duration (seconds)', aggfunc='mean').round(1)

# Show the table.
ufo_country_state

Unnamed: 0_level_0,Unnamed: 1_level_0,duration (seconds)
country,state,Unnamed: 2_level_1
au,al,900.0
au,dc,300.0
au,nt,180.0
au,oh,180.0
au,sa,152.5
...,...,...
us,vt,1042.5
us,wa,15273.5
us,wi,1928.4
us,wv,6791.9


In [12]:
converted_ufo_df.columns

Index(['datetime', 'city', 'state', 'country', 'shape', 'duration (seconds)',
       'duration (hours/min)', 'comments', 'date posted', 'latitude',
       'longitude '],
      dtype='object')

In [16]:
# Show the number of UFOs for each country, state, and city.  
ufo_country_state_city = pd.pivot_table(converted_ufo_df, index=['country', 'state', 'city'], 
                                        values='shape', aggfunc='count').round(1)

# Show the table.
ufo_country_state_city

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,shape
country,state,city,Unnamed: 3_level_1
au,al,melbourne (australia),1
au,dc,maroochydore (queensland) (australia),1
au,nt,darwin (nt&#44 australia),2
au,oh,adelaide (south australia),1
au,sa,adelaide (south australia),1
...,...,...,...
us,wy,thermopolis,1
us,wy,torrington,1
us,wy,worland,2
us,wy,wyoming (i-80&#44 westbound),1


In [18]:
# Show the number of UFO sightings for each country, state, and city. 
# And, use `sort=False` to sort in descending order. 
ufo_country_state_city = pd.pivot_table(converted_ufo_df, index=['country', 'state', 'city'], 
                                        values='shape', aggfunc='count', sort=True).round(1)

# Show the table. 
ufo_country_state_city.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,shape
country,state,city,Unnamed: 3_level_1
au,al,melbourne (australia),1
au,dc,maroochydore (queensland) (australia),1
au,nt,darwin (nt&#44 australia),2
au,oh,adelaide (south australia),1
au,sa,adelaide (south australia),1
au,sa,port adelaide (south australia),1
au,wa,cue (western australia) (australia),1
au,wa,perth (western australia),1
au,yt,port macquarie (australia),1
ca,ab,airdrie (canada),10


In [21]:
# Rename the "shape" column to "UFO Sightings"
ufo_country_state_city = ufo_country_state_city.rename(columns={'shape':"UFO Sightings"})

# Sort the pivot table to show the highest number of UFO sightings by country, state, and city.
ufo_country_state_city.sort_values(ascending=False, by='UFO Sightings').head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,UFO Sightings
country,state,city,Unnamed: 3_level_1
us,wa,seattle,471
us,az,phoenix,434
us,nv,las vegas,352
us,ca,los angeles,347
us,ca,san diego,327
us,or,portland,313
us,tx,houston,289
us,il,chicago,256
us,az,tucson,237
us,fl,miami,222


### Multi-Index and Multi-Aggregations

In [23]:
# Show the minimum and maximum seconds for each country and state.
ufo_country_state_minmax = pd.pivot_table(converted_ufo_df, index=['country', 'state'], 
                                   values='duration (seconds)', aggfunc=('min', 'max'))

# Show the table. 
ufo_country_state_minmax

Unnamed: 0_level_0,Unnamed: 1_level_0,max,min
country,state,Unnamed: 2_level_1,Unnamed: 3_level_1
au,al,900.0,900.00
au,dc,300.0,300.00
au,nt,300.0,60.00
au,oh,180.0,180.00
au,sa,300.0,5.00
...,...,...,...
us,vt,109800.0,1.00
us,wa,52623200.0,0.01
us,wi,1209600.0,1.00
us,wv,2631600.0,1.00
