In [1]:
# Import Dependencies
import pandas as pd

In [3]:
# Read file into DataFrame
ufo_df = pd.read_csv('Resources/ufoSightings.csv', low_memory=False)

# Remove the rows with missing data
clean_ufo_df = ufo_df.dropna(how="any")

# Converting the "duration (seconds)" column's values to numeric
converted_ufo_df = clean_ufo_df.copy()
converted_ufo_df["duration (seconds)"] = converted_ufo_df.loc[:, "duration (seconds)"].astype(float)

# Display the DataFrame
converted_ufo_df.head()

Unnamed: 0,datetime,city,state,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700.0,45 minutes,This event took place in early fall around 194...,4/27/2004,29.8830556,-97.941111
3,10/10/1956 21:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.9783333,-96.645833
4,10/10/1960 20:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.4180556,-157.803611
5,10/10/1961 19:00,bristol,tn,us,sphere,300.0,5 minutes,My father is now 89 my brother 52 the girl wit...,4/27/2007,36.595,-82.188889
7,10/10/1965 23:45,norwalk,ct,us,disk,1200.0,20 minutes,A bright orange color changing to reddish colo...,10/2/1999,41.1175,-73.408333


In [4]:
converted_ufo_df.columns

Index(['datetime', 'city', 'state', 'country', 'shape', 'duration (seconds)',
       'duration (hours/min)', 'comments', 'date posted', 'latitude',
       'longitude '],
      dtype='object')

In [8]:
# Create a custom function that will calculate the average of DataFrame column
def custom_mean(x):
    return x.mean()

In [11]:
# Use the custom_avg function to show the average seconds for each country and state and round to one decimal place
df = pd.pivot_table(converted_ufo_df, index=['country', 'state'], 
                    values='duration (seconds)', aggfunc=custom_mean)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,duration (seconds)
country,state,Unnamed: 2_level_1
au,al,900.000000
au,dc,300.000000
au,nt,180.000000
au,oh,180.000000
au,sa,152.500000
...,...,...
us,vt,1042.462598
us,wa,15273.474357
us,wi,1928.422656
us,wv,6791.901826


In [12]:
# Rename the columns to reflect the results. 
df = df.rename(columns={'duration (seconds)' : "Avg. Seconds"})

df

Unnamed: 0_level_0,Unnamed: 1_level_0,Avg. Seconds
country,state,Unnamed: 2_level_1
au,al,900.000000
au,dc,300.000000
au,nt,180.000000
au,oh,180.000000
au,sa,152.500000
...,...,...
us,vt,1042.462598
us,wa,15273.474357
us,wi,1928.422656
us,wv,6791.901826


In [14]:
# Create two more custom functions. 
# 1) Returns the number of items from a DataFrame column.
def custom_count(x):
    return x.count()


# 2) Returns the total the total from a DataFrame column.
def custom_sum(x):
    return x.sum()


In [15]:
# Show the total number of sighting, and the avg and total number of seconds of UFOs
# for each country, state, and city.  
df = pd.pivot_table(converted_ufo_df, index=['country', 'state', 'city'], 
                    values='duration (seconds)', aggfunc=(custom_count, custom_mean, custom_sum))

# Display the top 25 results
df.head(25)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,custom_count,custom_mean,custom_sum
country,state,city,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
au,al,melbourne (australia),1,900.0,900.0
au,dc,maroochydore (queensland) (australia),1,300.0,300.0
au,nt,darwin (nt&#44 australia),2,180.0,360.0
au,oh,adelaide (south australia),1,180.0,180.0
au,sa,adelaide (south australia),1,300.0,300.0
au,sa,port adelaide (south australia),1,5.0,5.0
au,wa,cue (western australia) (australia),1,30.0,30.0
au,wa,perth (western australia),1,420.0,420.0
au,yt,port macquarie (australia),1,30.0,30.0
ca,ab,airdrie (canada),10,863.7,8637.0


In [16]:
# Get the column names 
df.columns

Index(['custom_count', 'custom_mean', 'custom_sum'], dtype='object')

In [17]:
# Rename the columns to reflect the results. 
df = df.rename(columns={'custom_count' : "Number of Sightings",
                        'custom_mean'  : "Avg Time of Sightings (seconds)",
                        "custom_sum"   : "Total Time Sighting (seconds)"})

# Display the top 20 results
df.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Number of Sightings,Avg Time of Sightings (seconds),Total Time Sighting (seconds)
country,state,city,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
au,al,melbourne (australia),1,900.0,900.0
au,dc,maroochydore (queensland) (australia),1,300.0,300.0
au,nt,darwin (nt&#44 australia),2,180.0,360.0
au,oh,adelaide (south australia),1,180.0,180.0
au,sa,adelaide (south australia),1,300.0,300.0
au,sa,port adelaide (south australia),1,5.0,5.0
au,wa,cue (western australia) (australia),1,30.0,30.0
au,wa,perth (western australia),1,420.0,420.0
au,yt,port macquarie (australia),1,30.0,30.0
ca,ab,airdrie (canada),10,863.7,8637.0


In [26]:
# Create a function the column value of a DataFrame if the value is greater than 20.
def custom_count(x):
    if x.count()>20:
        return x.count()

In [31]:
# Show the number of UFOs for each city, state, and country. 
df = pd.pivot_table(converted_ufo_df, index=['country', 'state', 'city'], 
                    values='duration (seconds)', aggfunc=(custom_count, custom_mean, custom_sum)).round(1)


# Display the results.
print(df.shape)

df.head(50)

(15569, 3)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,custom_count,custom_mean,custom_sum
country,state,city,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
au,al,melbourne (australia),,900.0,900.0
au,dc,maroochydore (queensland) (australia),,300.0,300.0
au,nt,darwin (nt&#44 australia),,180.0,360.0
au,oh,adelaide (south australia),,180.0,180.0
au,sa,adelaide (south australia),,300.0,300.0
au,sa,port adelaide (south australia),,5.0,5.0
au,wa,cue (western australia) (australia),,30.0,30.0
au,wa,perth (western australia),,420.0,420.0
au,yt,port macquarie (australia),,30.0,30.0
ca,ab,airdrie (canada),,863.7,8637.0


In [33]:
# Drop the null values
df1 = df.dropna(how="any")
print(df1.shape)

(480, 3)


In [35]:
# Show the total number of UFO sightings for each city, state, and country. 
# Limit the number of sightings to 20 or more by using the updated custom function.
# Show the number of UFOs for each city, state, and country. 
df = pd.pivot_table(converted_ufo_df, index=['country', 'state', 'city'], 
                    values='duration (seconds)', aggfunc=(custom_count)).round(1)


# Display the results.
print(df.shape)

df.head(50)

# Drop the null values


# Show the top 25 results


(480, 1)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,duration (seconds)
country,state,city,Unnamed: 3_level_1
ca,ab,calgary (canada),74.0
ca,ab,edmonton (canada),56.0
ca,bc,houston (canada),25.0
ca,bc,kelowna (canada),40.0
ca,bc,surrey (canada),28.0
ca,bc,terrace (canada),25.0
ca,bc,vancouver (canada),95.0
ca,bc,victoria (canada),73.0
ca,mb,winnipeg (canada),67.0
ca,ns,halifax (canada),27.0


In [38]:
# Rename the column to reflect the results. 
df = df.rename(columns={'duration (seconds)': "Num Sightings"})

# Sort the pivot table to show the highest number of UFO sightings by country, state, and city.
df.sort_values(by=['Num Sightings', 'country', 'state', 'city'], ascending=False).head(50)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Num Sightings
country,state,city,Unnamed: 3_level_1
us,wa,seattle,471.0
us,az,phoenix,434.0
us,nv,las vegas,352.0
us,ca,los angeles,347.0
us,ca,san diego,327.0
us,or,portland,313.0
us,tx,houston,289.0
us,il,chicago,256.0
us,az,tucson,237.0
us,fl,miami,222.0
