In [1]:
%matplotlib inline


In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime
import us

##  Import display 
from IPython.display import display


### ipywidget libraries
from ipywidgets import HBox, VBox, IntSlider, Play, jslink
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

### bqplot libraries
import bqplot
from bqplot import (
    Axis, ColorAxis, LinearScale, DateScale, DateColorScale, OrdinalScale,
    OrdinalColorScale, ColorScale, Scatter, Lines, Figure, Tooltip
)
from bqplot import (
    Figure, Map, Mercator, Orthographic, ColorScale, ColorAxis,
    AlbersUSA, topo_load, Tooltip
)

In [3]:
names = ["date", "city", "state", "country", "shape", "duration_seconds",
         "duration_reported", "description", "report_date", "latitude",
         "longitude","time"]

fn = "ufo-scrubbed-geocoded-time-standardized_new.csv"
ufo = pd.read_csv(fn, names = names, parse_dates = ["date", "report_date"])

  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
ufo.head()

Unnamed: 0,date,city,state,country,shape,duration_seconds,duration_reported,description,report_date,latitude,longitude,time
0,10-10-1949 20:30,san marcos,tx,us,cylinder,2700.0,45 minutes,This event took place in early fall around 194...,2004-04-27,29.8830556,-97.941111,20:30
1,10-10-1949 21:00,lackland afb,tx,,light,7200.0,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,2005-12-16,29.38421,-98.581082,21:00
2,10-10-1955 17:00,chester (uk/england),,gb,circle,20.0,20 seconds,Green/Orange circular disc over Chester&#44 En...,2008-01-21,53.2,-2.916667,17:00
3,10-10-1956 21:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,2004-01-17,28.9783333,-96.645833,21:00
4,10-10-1960 20:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,2004-01-22,21.4180556,-157.803611,20:00


In [5]:
abbr_to_fits = us.states.mapping('abbr', 'fips')


In [6]:
ufo["fips"] = ufo["state"].apply(lambda a: int(abbr_to_fits.get(str(a).upper(), -1)))

In [7]:
ufo

Unnamed: 0,date,city,state,country,shape,duration_seconds,duration_reported,description,report_date,latitude,longitude,time,fips
0,10-10-1949 20:30,san marcos,tx,us,cylinder,2700.0,45 minutes,This event took place in early fall around 194...,2004-04-27,29.8830556,-97.941111,20:30,48
1,10-10-1949 21:00,lackland afb,tx,,light,7200.0,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,2005-12-16,29.38421,-98.581082,21:00,48
2,10-10-1955 17:00,chester (uk/england),,gb,circle,20.0,20 seconds,Green/Orange circular disc over Chester&#44 En...,2008-01-21,53.2,-2.916667,17:00,-1
3,10-10-1956 21:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,2004-01-17,28.9783333,-96.645833,21:00,48
4,10-10-1960 20:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,2004-01-22,21.4180556,-157.803611,20:00,15
5,10-10-1961 19:00,bristol,tn,us,sphere,300.0,5 minutes,My father is now 89 my brother 52 the girl wit...,2007-04-27,36.595,-82.188889,19:00,47
6,10-10-1965 21:00,penarth (uk/wales),,gb,circle,180.0,about 3 mins,penarth uk circle 3mins stayed 30ft above m...,2006-02-14,51.434722,-3.180000,21:00,-1
7,10-10-1965 23:45,norwalk,ct,us,disk,1200.0,20 minutes,A bright orange color changing to reddish colo...,2017-12-12,41.1175,-73.408333,23:45,9
8,10-10-1966 20:00,pell city,al,us,disk,180.0,3 minutes,Strobe Lighted disk shape object observed clos...,2009-03-19,33.5861111,-86.286111,20:00,1
9,10-10-1966 21:00,live oak,fl,us,disk,120.0,several minutes,Saucer zaps energy from powerline as my pregna...,2017-12-12,30.2947222,-82.984167,21:00,12


In [8]:

ufo["fips"].unique()

array([48, -1, 15, 47,  9,  1, 12,  6, 37, 36, 21, 26, 25, 20, 45, 53,  8,
       33, 55, 23, 13, 42, 17,  5, 29, 39, 18,  4, 27, 32, 31, 41, 19, 51,
       16, 35, 34, 54, 40, 44, 50, 22, 72,  2, 28, 49, 24, 30, 56, 46, 10,
       38, 11], dtype=int64)

In [9]:
ufo.date


0        10-10-1949 20:30
1        10-10-1949 21:00
2        10-10-1955 17:00
3        10-10-1956 21:00
4        10-10-1960 20:00
5        10-10-1961 19:00
6        10-10-1965 21:00
7        10-10-1965 23:45
8        10-10-1966 20:00
9        10-10-1966 21:00
10       10-10-1968 13:00
11       10-10-1968 19:00
12       10-10-1970 16:00
13       10-10-1970 19:00
14       10-10-1971 21:00
15       10-10-1972 19:00
16       10-10-1972 22:30
17       10-10-1973 19:00
18       10-10-1973 23:00
19       10-10-1974 19:30
20       10-10-1974 21:30
21       10-10-1974 23:00
22       10-10-1975 17:00
23       10-10-1976 20:30
24       10-10-1976 22:00
25       10-10-1977 12:00
26       10-10-1977 22:00
27       10-10-1978 02:00
28       10-10-1979 00:00
29       10-10-1979 22:00
               ...       
80302    09-09-2012 20:00
80303    09-09-2012 20:10
80304    09-09-2012 20:30
80305    09-09-2012 20:30
80306    09-09-2012 20:52
80307    09-09-2012 21:00
80308    09-09-2012 21:00
80309    09-

In [10]:
date= [y.split(' ') for y in ufo['date']]
years=[]
for item in date:
    if '/' in item[0]:
        day,month,year=item[0].split('/')
    else:
        day,month,year=item[0].split('-')
    years.append(year)
ufo['year']=years



In [11]:
ufo.tail()

Unnamed: 0,date,city,state,country,shape,duration_seconds,duration_reported,description,report_date,latitude,longitude,time,fips,year
80327,09-09-2013 21:15,nashville,tn,us,light,600.0,10 minutes,Round from the distance/slowly changing colors...,2013-09-30,36.1658,-86.784444,21:15,47,2013
80328,09-09-2013 22:00,boise,id,us,circle,1200.0,20 minutes,Boise&#44 ID&#44 spherical&#44 20 min&#44 10 r...,2013-09-30,43.6136,-116.2025,22:00,16,2013
80329,09-09-2013 22:00,napa,ca,us,other,1200.0,hour,Napa UFO&#44,2013-09-30,38.2972,-122.284444,22:00,6,2013
80330,09-09-2013 22:20,vienna,va,us,circle,5.0,5 seconds,Saw a five gold lit cicular craft moving fastl...,2013-09-30,38.9011,-77.265556,22:20,51,2013
80331,09-09-2013 23:00,edmond,ok,us,cigar,1020.0,17 minutes,2 witnesses 2 miles apart&#44 Red &amp; White...,2013-09-30,35.6528,-97.477778,23:00,40,2013


In [12]:
# Making the plots

In [13]:
#number of sightings per state
sightings_count = np.log10(ufo.groupby("fips")["duration_seconds"].count())

In [14]:
print(sightings_count)

fips
-1     3.972851
 1     2.839478
 2     2.549003
 4     3.429591
 5     2.823474
 6     3.984752
 8     3.177536
 9     2.985875
 10    2.262451
 11    1.995635
 12    3.623249
 13    3.129368
 15    2.547775
 16    2.743510
 17    3.422426
 18    3.141763
 19    2.849419
 20    2.814913
 21    2.960946
 22    2.776701
 23    2.801404
 24    2.959518
 25    3.132900
 26    3.316180
 27    3.033826
 28    2.618048
 29    3.197556
 30    2.707570
 31    2.607455
 32    2.956649
 33    2.728354
 34    3.179552
 35    2.911158
 36    3.507721
 37    3.271609
 38    2.139879
 39    3.384712
 40    2.884229
 41    3.265996
 42    3.411956
 44    2.462398
 45    3.031812
 46    2.292256
 47    3.076640
 48    3.565494
 49    2.870989
 50    2.487138
 51    3.143951
 53    3.630224
 54    2.686636
 55    3.124830
 56    2.311754
 72    1.518514
Name: duration_seconds, dtype: float64


In [15]:
ufo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 80332 entries, 0 to 80331
Data columns (total 14 columns):
date                 80332 non-null object
city                 80332 non-null object
state                74535 non-null object
country              70662 non-null object
shape                78400 non-null object
duration_seconds     80332 non-null float64
duration_reported    80332 non-null object
description          80317 non-null object
report_date          80332 non-null datetime64[ns]
latitude             80332 non-null object
longitude            80332 non-null float64
time                 80332 non-null object
fips                 80332 non-null int64
year                 80332 non-null object
dtypes: datetime64[ns](1), float64(2), int64(1), object(10)
memory usage: 8.6+ MB


In [16]:
#total time in sightings per state
ufo['duration_seconds'].astype(float)

ufo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 80332 entries, 0 to 80331
Data columns (total 14 columns):
date                 80332 non-null object
city                 80332 non-null object
state                74535 non-null object
country              70662 non-null object
shape                78400 non-null object
duration_seconds     80332 non-null float64
duration_reported    80332 non-null object
description          80317 non-null object
report_date          80332 non-null datetime64[ns]
latitude             80332 non-null object
longitude            80332 non-null float64
time                 80332 non-null object
fips                 80332 non-null int64
year                 80332 non-null object
dtypes: datetime64[ns](1), float64(2), int64(1), object(10)
memory usage: 8.6+ MB


In [17]:
sightings_time= np.log10 (ufo.groupby("fips")["duration_seconds"].sum())

In [18]:
sightings_time

fips
-1     8.503200
 1     5.983557
 2     6.175532
 4     7.203874
 5     7.827224
 6     7.578965
 8     6.658175
 9     7.102789
 10    5.201423
 11    5.060551
 12    7.753726
 13    7.030755
 15    6.832601
 16    5.729635
 17    6.365605
 18    6.632396
 19    5.826972
 20    5.995244
 21    6.584752
 22    6.836703
 23    6.288037
 24    5.880570
 25    6.250896
 26    6.905581
 27    6.183390
 28    6.561750
 29    6.256130
 30    6.039751
 31    5.634219
 32    6.422925
 33    6.059093
 34    6.903827
 35    6.625975
 36    6.972317
 37    6.379440
 38    5.183401
 39    6.582320
 40    7.058271
 41    6.293024
 42    7.012941
 44    5.720528
 45    6.074190
 46    5.708670
 47    6.283591
 48    6.940488
 49    6.546679
 50    5.519591
 51    7.137941
 53    7.762023
 54    6.481776
 55    6.436153
 56    5.576956
 72    4.463744
Name: duration_seconds, dtype: float64

In [19]:
from bqplot import (
    Figure, Map, ColorScale, ColorAxis,
    AlbersUSA,topo_load, Tooltip
)

from bqplot import pyplot as plt

In [20]:
map_tooltip=Tooltip(fields=["name","id","color", 'color'],labels=["state", 'id',"sightings_count"])
map_styles = {'scales': {'projection': bqplot.AlbersUSA(),'color': bqplot.ColorScale(scheme='spectral'
               ,reverse=True,domain=['color1', 'color'])},
            }
states_map = Map(map_data=bqplot.topo_load('map_data/USStatesMap.json'), 
                        **map_styles,
                         tooltip=map_tooltip,
                         interactions = {'click': 'select', 'hover': 'tooltip'},
                         selected_style={'opacity': 1.5, 'fill': 'blue', 'stroke': 'white'},
                          unselected_style={'opacity': 1.0}
                       )
map_fig = bqplot.Figure(marks=[states_map], title='MAP OF USA')
states_map.display_legend=True
states_map.enable_hover
map_fig

In [21]:
map_fig= plt.figure(title="MAP OF USA", min_width=1300, min_height=800)
map_tt=Tooltip(fields=['name','color'], labels=['state','sightings_count'])
states_map= plt.geo(map_data=topo_load('map_data/USStatesMap.json'),tooltip=map_tt)

In [22]:
states_map

Map(hovered_styles={'hovered_fill': 'Orange', 'hovered_stroke': None, 'hovered_stroke_width': 2.0}, interactions={'hover': 'tooltip'}, map_data={'type': 'Topology', 'objects': {'land': {'type': 'MultiPolygon', 'properties': {'counties': None}, 'arcs': [[[5830, 6026, 6034, 6073, 6164, 6179, 6023, 6043, 6231, 6416, 6410, 6564, 6243, 6565, 6546, 6475, 6542, 6784, 6741, 6811, 6739, 6870, 6965, 7087, 7246, -7086, 7247, 7255, 7256, 7257, 7305, 7566, 9399, 7668, 9397, -7836, 9398, 7838, 8050, 8017, 8018, 8019, 8020, 8021, 8047, 8023, 8252, 8247, 8302, 8493, 8575, -8491, 8576, 8671, 8807, 8877, 8984, 9074, 9116, 9138, 9129, 9073, 8960, 8874, 8961, 8956, 8957, 9090, 9130, 9181, -9123, 9182, 9125, 9177, 9255, 9278, 9279, 9280, 9311, 9306, 9307, 9308, 9309, 9318, 9340, 9347, 9342, 9343, 9344, 9345, 9349, 9338, 9326, 9317, 9284, 9315, 9286, 9266, 9241, 9239, 9214, 9200, 9170, 9113, 9114, 9350, 8936, 8837, 8937, 9039, 8977, 9038, 8979, 8884, 8980, 8886, 8777, -8768, 8778, 8770, 8761, 8755, 8642, 86

In [23]:
figure=map_fig

In [24]:
plt.figure

<function bqplot.pyplot.figure>

In [25]:
#dropdown interaction
def dropdown_callback(change):
    map_tooltip.labels=["state",change.new]
    x_ay.label=change.new
    if change.new =="sightings_count":
        states_map.color=fips_count.to_dict()
        lc.y=np.cumsum(get_state_data(state,min_year,max_year)[0])
    else:
        states_map.color=sightings_time.to_dict() 
        lc.y=np.cumsum(get_state_data(state,min_year,max_year)[1])

In [26]:
dropdown_list=widgets.Dropdown(options=["sightings_count","sightings_time"],value="sightings_count",description="color_option")

In [27]:
dropdown_list


In [28]:
def get_state_data(state,from_year,to_year):
    state_sighting=ufo["state"]==state
    state_ufo=ufo[state_sighting]
    total_sighting= [state_ufo["duration_seconds"][state_ufo["year"]==year].count() for year in np.arange(from_year,to_year+1)]
    total_duration= [state_ufo["duration_seconds"][state_ufo["year"]==year].sum() for year in np.arange(from_year,to_year+1)]
    return [total_sighting,total_duration]

In [29]:
np.cumsum(get_state_data("az",1947,1949)[0])

array([0, 0, 0], dtype=int32)

In [30]:
import math

In [31]:
def get_min_max_year(state):
    min_year=ufo[ufo["state"]==state]["date"].min().year
    max_year=ufo[ufo["state"]==state]["date"].max().year
    return [min_year,max_year]

In [32]:
def get_state_data(state_id,from_year,to_year):
    state_sighting=ufo["fips"]==state_id
    state_ufo=ufo[state_sighting]
    total_duration= [state_ufo["duration_seconds"][state_ufo["year"]==year].sum() for year in np.arange(from_year,to_year+1)]
    total_sighting= [state_ufo["duration_seconds"][state_ufo["year"]==year].count() for year in np.arange(from_year,to_year+1)]
    return [range(from_year, to_year +1 ),total_sighting,total_duration]

In [33]:
def get_color(from_year,to_year):
    
    total_sighting = np.log10([ufo["duration_seconds"][ufo["year"]==year].count() for year in np.arange(from_year,to_year+1)])
    total_sighting = np.nan_to_num(total_sighting)
    
    total_duration= [ufo["duration_seconds"][ufo["year"]==year].sum() for year in np.arange(from_year,to_year+1)]
    total_duration = np.nan_to_num(np.log10([1 if math.isnan(x) else x for x in total_duration ]))
    
    return [total_sighting,total_duration]


In [34]:
def upd_scat_plot(state_id,from_year,to_year,aggregate): 
    scat_data = get_state_data(get_state_id(),from_year,to_year)
    scat_plot.x = scat_data[0]
    scat_plot.y = scat_data[aggregate]
    if(aggregate == 1):
        tt_labels=["State","Id","Total Sightings"]
    else:
        tt_labels=["State","Id","Total Duration"]
    
    map_tooltip.labels =tt_labels

In [35]:
from IPython.display import display

In [36]:
style = {'description_width': 'initial'}

slider =  widgets.IntRangeSlider(
    value=[1920, 1960],
    min=ufo['year'].min(),
    max=ufo['year'].max(),
    step=4,
    description='Select Year Range',
    style=style,
    disabled=False,
    continuous_update=False,
    orientation='Horizontal',
    readout=True,
)

display(slider)

ddl = widgets.Dropdown(
    options={'Sight Counts': 1, 'Total Duration': 2 },
    value=1,
    description='Aggregate By:',
)

ddl2 = widgets.Dropdown(
    options={'Normalized Data': 1, 'Normal Data': 2 },
    value=2,
    description='Select Data:',
)


togg_norm = widgets.ToggleButtons(options=['Normalized Data','Data'],
                                  description='Select Data format:',disabled=False,
                                  button_style='', # 'success', 'info', 'warning', 'danger' or ''
                                  tooltips=['Normalized data ', 'Normal Data']
                                 )
#togg_norm.observe(upd_on_data_sel)
ddl2

In [37]:
default_state = 48 ## 
aggregate = ddl.value

x = LinearScale()
y = LinearScale()

col_sc = ColorScale()

axis_x = Axis( scale=x, label='Year Range')
axis_y = Axis( scale=y,  label='Total Sightings', orientation='vertical')

from_year, to_year  = slider.value[0],slider.value[1]
scat_data = get_state_data(default_state,from_year,to_year)


scat_plot = Lines(x=scat_data[0] , y=scat_data[aggregate], 
                scales={'x': x, 'y': y},                
                stroke='white',
                colors = ['orange'],
                    labels=['YEAR', 'Range' ]
                    
                   )   
plot_scat = Figure(axes=[axis_x,axis_y], marks=[scat_plot])

In [38]:
def upd_plot(self, target): 
    id = target['data']['id']
    upd_scat_plot(id, slider.value[0], slider.value[1],ddl.value)
    
def get_state_id():  
    if (len(states_map.selected) == 0 ): 
        state_id = default_state
    elif(len(states_map.selected) > 1):
        state_id = states_map.selected.pop()
    else:
        state_id = states_map.selected
    return state_id 

def upd_map(change): 
    
    if(change['new'] == 1 ):
        states_map.color=fips_count.to_dict()
    else:
        states_map.color=tot_time.to_dict()
                
def upd_axes(change):
    if(change['new'] == 1 ):
        scat_plot.labels='asdfad'
    else:
        states_map.color=tot_time.to_dict()
        
def upd_scat_ddl(change):
    if(change['new'] == 1):
        axis_y.label = "Total Sightings"
    else: 
        axis_y.label = "Total Duration"
    
    upd_scat_plot(get_state_id(), slider.value[0], slider.value[1], change['new']) 

def upd_on_data_sel(change):
    print(change['new'])
    #print(ddl2.value)
    upd_scat_plot(states_map.selected, slider.value[0], slider.value[1],ddl.value)

In [39]:
states_map.on_element_click(upd_plot)
states_map.selected=[]
ddl.observe(upd_map , names='value')
ddl.observe(upd_scat_ddl , names='value')
ddl2.observe(upd_on_data_sel , names='value')

In [40]:
from ipywidgets import ToggleButtons, VBox, HTML, Dropdown, HBox

In [65]:
tt_labels=["State","Id","Total Sightings"]
map_tooltip.labels = tt_labels
display(slider)
states_map.selected=[]
H2 = HBox(children = [ddl,ddl2])
H1 = HBox(children = [plot_scat,map_fig])
V1 = VBox(children=[H2, H1])
V1

In [66]:
fips_count = np.log10(ufo.groupby("fips")["duration_seconds"].count())

tot_time = np.log10(ufo.groupby("fips")["duration_seconds"].sum())
ufo.head()

Unnamed: 0,date,city,state,country,shape,duration_seconds,duration_reported,description,report_date,latitude,longitude,time,fips,year
0,10-10-1949 20:30,san marcos,tx,us,cylinder,2700.0,45 minutes,This event took place in early fall around 194...,2004-04-27,29.8830556,-97.941111,20:30,48,1949
1,10-10-1949 21:00,lackland afb,tx,,light,7200.0,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,2005-12-16,29.38421,-98.581082,21:00,48,1949
2,10-10-1955 17:00,chester (uk/england),,gb,circle,20.0,20 seconds,Green/Orange circular disc over Chester&#44 En...,2008-01-21,53.2,-2.916667,17:00,-1,1955
3,10-10-1956 21:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,2004-01-17,28.9783333,-96.645833,21:00,48,1956
4,10-10-1960 20:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,2004-01-22,21.4180556,-157.803611,20:00,15,1960


In [74]:
population = pd.read_csv("population_us_n.csv")
population.set_index("Year",inplace=True)

In [75]:
population

Unnamed: 0_level_0,AL,AK,AZ,AR,CA,CO,CT,DE,FL,GA,...,TN,TX,UT,VT,VA,WA,WV,WI,WY,DC
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1900,1830000,0,124000,1314000,1490000,543000,910000,185000,530000,2220000,...,2023000,3055000,277000,344000,1858000,523000,959000,2072000,93000,278000
1901,1907000,0,131000,1341000,1550000,581000,931000,187000,544000,2263000,...,2041000,3132000,284000,347000,1887000,583000,972000,2109000,100000,285000
1902,1935000,0,138000,1360000,1623000,621000,952000,188000,565000,2305000,...,2060000,3210000,292000,349000,1894000,651000,1000000,2141000,105000,290000
1903,1957000,0,144000,1384000,1702000,652000,972000,190000,587000,2346000,...,2082000,3291000,299000,350000,1890000,719000,1037000,2171000,108000,295000
1904,1978000,0,151000,1419000,1792000,659000,987000,192000,599000,2387000,...,2086000,3374000,308000,353000,1889000,782000,1064000,2202000,111000,302000
1905,2012000,0,158000,1447000,1893000,680000,1010000,194000,615000,2427000,...,2111000,3459000,316000,354000,1917000,842000,1094000,2231000,114000,308000
1906,2045000,0,167000,1465000,1976000,707000,1033000,196000,628000,2466000,...,2136000,3546000,327000,355000,1942000,904000,1122000,2250000,118000,313000
1907,2058000,0,176000,1484000,2054000,733000,1057000,197000,645000,2505000,...,2136000,3636000,339000,355000,1952000,967000,1149000,2268000,125000,317000
1908,2070000,0,186000,1513000,2161000,757000,1077000,199000,684000,2543000,...,2164000,3727000,351000,354000,1994000,1028000,1174000,2292000,133000,321000
1909,2108000,0,196000,1545000,2282000,775000,1097000,201000,724000,2580000,...,2177000,3821000,363000,356000,2038000,1091000,1203000,2317000,141000,327000


In [76]:

abbr_to_fits = us.states.mapping('abbr', 'fips')

In [77]:
ufo["fips"] = ufo["state"].apply(lambda a: int(abbr_to_fits.get(str(a).upper(), -1))) 

In [78]:
population.columns=[int(abbr_to_fits.get(key,-1)) for key in population.columns]

In [79]:

population.head()

Unnamed: 0_level_0,1,2,4,5,6,8,9,10,12,13,...,47,48,49,50,51,53,54,55,56,11
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1900,1830000,0,124000,1314000,1490000,543000,910000,185000,530000,2220000,...,2023000,3055000,277000,344000,1858000,523000,959000,2072000,93000,278000
1901,1907000,0,131000,1341000,1550000,581000,931000,187000,544000,2263000,...,2041000,3132000,284000,347000,1887000,583000,972000,2109000,100000,285000
1902,1935000,0,138000,1360000,1623000,621000,952000,188000,565000,2305000,...,2060000,3210000,292000,349000,1894000,651000,1000000,2141000,105000,290000
1903,1957000,0,144000,1384000,1702000,652000,972000,190000,587000,2346000,...,2082000,3291000,299000,350000,1890000,719000,1037000,2171000,108000,295000
1904,1978000,0,151000,1419000,1792000,659000,987000,192000,599000,2387000,...,2086000,3374000,308000,353000,1889000,782000,1064000,2202000,111000,302000


In [81]:
sighting_over_population=pd.DataFrame([get_state_data(state_id,1900,2014)[1]/pd.to_numeric(population[state_id]) for state_id in population.columns]).transpose()

            

In [82]:
sighting_over_population.min().min()

0.0