In [78]:
# import packages and the data
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from ipywidgets import interact
from ipywidgets import widgets

ufos = pd.read_csv("ufo-scrubbed-geocoded-time-standardized-00.csv",
                  names = ["date", "city", "state", "country",
                          "shape", "duration_seconds", "duration",
                          "comment", "report_date", "latitude", "longitude"],
                  parse_dates = ["date", "report_date"])

In [79]:
ufos

Unnamed: 0,date,city,state,country,shape,duration_seconds,duration,comment,report_date,latitude,longitude
0,1949-10-10 20:30:00,san marcos,tx,us,cylinder,2700.0,45 minutes,This event took place in early fall around 194...,2004-04-27,29.883056,-97.941111
1,1949-10-10 21:00:00,lackland afb,tx,,light,7200.0,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,2005-12-16,29.384210,-98.581082
2,1955-10-10 17:00:00,chester (uk/england),,gb,circle,20.0,20 seconds,Green/Orange circular disc over Chester&#44 En...,2008-01-21,53.200000,-2.916667
3,1956-10-10 21:00:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,2004-01-17,28.978333,-96.645833
4,1960-10-10 20:00:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,2004-01-22,21.418056,-157.803611
...,...,...,...,...,...,...,...,...,...,...,...
80327,2013-09-09 21:15:00,nashville,tn,us,light,600.0,10 minutes,Round from the distance/slowly changing colors...,2013-09-30,36.165833,-86.784444
80328,2013-09-09 22:00:00,boise,id,us,circle,1200.0,20 minutes,Boise&#44 ID&#44 spherical&#44 20 min&#44 10 r...,2013-09-30,43.613611,-116.202500
80329,2013-09-09 22:00:00,napa,ca,us,other,1200.0,hour,Napa UFO&#44,2013-09-30,38.297222,-122.284444
80330,2013-09-09 22:20:00,vienna,va,us,circle,5.0,5 seconds,Saw a five gold lit cicular craft moving fastl...,2013-09-30,38.901111,-77.265556


In [81]:
ufos = ufos.fillna({'country':'unknown', 'state':'unknown', 'shape':'unknown'})

In [82]:
ufos

Unnamed: 0,date,city,state,country,shape,duration_seconds,duration,comment,report_date,latitude,longitude
0,1949-10-10 20:30:00,san marcos,tx,us,cylinder,2700.0,45 minutes,This event took place in early fall around 194...,2004-04-27,29.883056,-97.941111
1,1949-10-10 21:00:00,lackland afb,tx,unknown,light,7200.0,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,2005-12-16,29.384210,-98.581082
2,1955-10-10 17:00:00,chester (uk/england),unknown,gb,circle,20.0,20 seconds,Green/Orange circular disc over Chester&#44 En...,2008-01-21,53.200000,-2.916667
3,1956-10-10 21:00:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,2004-01-17,28.978333,-96.645833
4,1960-10-10 20:00:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,2004-01-22,21.418056,-157.803611
...,...,...,...,...,...,...,...,...,...,...,...
80327,2013-09-09 21:15:00,nashville,tn,us,light,600.0,10 minutes,Round from the distance/slowly changing colors...,2013-09-30,36.165833,-86.784444
80328,2013-09-09 22:00:00,boise,id,us,circle,1200.0,20 minutes,Boise&#44 ID&#44 spherical&#44 20 min&#44 10 r...,2013-09-30,43.613611,-116.202500
80329,2013-09-09 22:00:00,napa,ca,us,other,1200.0,hour,Napa UFO&#44,2013-09-30,38.297222,-122.284444
80330,2013-09-09 22:20:00,vienna,va,us,circle,5.0,5 seconds,Saw a five gold lit cicular craft moving fastl...,2013-09-30,38.901111,-77.265556


In [83]:
# There are too much data in the dataset, so I need to downsample it.
nSamples = 1000

In [84]:
np.random.seed(0)
randomIndicies = np.random.choice(range(len(ufos)-1), nSamples, replace=False) # replace=False is to not double count indicies

In [85]:
# randomly select 1000 samples from the dataset
new_ufos = ufos.loc[randomIndicies]

In [86]:
new_ufos

Unnamed: 0,date,city,state,country,shape,duration_seconds,duration,comment,report_date,latitude,longitude
59392,2001-07-24 10:00:00,ocean city,md,us,flash,1.0,1 second,flash of light&#33 possibly abduction&#33,2006-05-15,38.336389,-75.085278
44710,2001-05-28 18:45:00,waynesburg,oh,us,chevron,60.0,1 minute,UFO sighting of boomerang in Ohio during Dayli...,2011-10-10,40.667778,-81.257500
74346,2004-09-14 20:45:00,dallas,tx,us,light,2.0,2 seconds,travelling southbound on Highway 67&#44 I saw ...,2004-09-29,32.783333,-96.800000
17220,2013-12-11 18:25:00,powell,wy,us,chevron,10.0,~10 seconds,Bright flashes in north sky...,2013-12-12,44.753889,-108.756667
71553,1996-08-05 00:30:00,tel aviv (israel),unknown,unknown,sphere,7200.0,2 hours,500 witnesses + TV cameras + army presence in ...,2008-07-05,32.085300,34.781768
...,...,...,...,...,...,...,...,...,...,...,...
71284,2013-08-03 23:30:00,new richmond,wi,us,light,300.0,5 minutes,Amber colored light in New Richmond&#44 Wi.,2013-08-30,45.123056,-92.536389
15585,2003-11-07 00:00:00,livingston,mt,us,fireball,5.0,5 seconds,Round green sphere&#44 dropping beind the moun...,2003-11-11,45.662500,-110.560278
25568,2005-02-10 19:50:00,new york city,ny,us,light,20.0,20 seconds,out my window in nyc i saw two very distinct s...,2005-04-16,40.714167,-74.006389
41284,2009-05-10 21:29:00,milpitas,ca,us,sphere,15.0,15 sec,Ufo Milpitas,2009-05-12,37.428333,-121.905556


### Allow the user to change the x and y fields on a scatter plot from the UFO dataset

In [69]:
new_ufos.select_dtypes('number').columns

Index(['duration_seconds', 'latitude', 'longitude'], dtype='object')

In [101]:
# scatter plot with both categorical and numerical x and only numerical y.
@interact
def scatter_plot(x=list(new_ufos.columns), 
                 y=list(new_ufos.select_dtypes('number').columns)):
    
    plt.plot(x, y, '.', data=new_ufos)

interactive(children=(Dropdown(description='x', options=('date', 'city', 'state', 'country', 'shape', 'duratio…

I don't the user to be able to pick any two columns to plot, instead I will only allow users to pick certain columns. Because many combinations of columns will lead to meaningless plots, which could not bring insights for users and look strange.

In [102]:
# allow the user to pick their colormap.
@interact
def scatter_plot(x=list(new_ufos.columns), 
                 y=list(new_ufos.select_dtypes('number').columns)):
    plt.scatter(new_ufos[x], new_ufos[y], 
           c=np.log10(new_ufos['duration_seconds']), cmap='rainbow')

interactive(children=(Dropdown(description='x', options=('date', 'city', 'state', 'country', 'shape', 'duratio…

In [22]:
buildings = pd.read_csv('building_inventory.csv',
               na_values={'Square Footage': 0, 
                         'Year Acquired': 0,
                         'Year Constructed': 0, 
                         'Floors':0}) 

In [23]:
stats = buildings.groupby("Year Acquired")["Square Footage"].describe()

In [24]:
@ipywidgets.interact(style = plt.style.available)
def make_plot(style): # function!  its needs a colon!
    with plt.style.context(style):
        fig, ax = plt.subplots(1,1, figsize=(4,2))
        ax.plot(stats["max"], marker='.', linewidth=1.0, label="Max")
        ax.set_label('Year')
        ax.set_ylabel("Square Footage")
        ax.set_yscale("log")
        ax.legend()
    plt.show()

interactive(children=(Dropdown(description='style', options=('Solarize_Light2', '_classic_test_patch', 'bmh', …

### Build a second widget that displays binned, aggregate values

In [173]:
@interact
def make_histogram(columns=list(new_ufos.columns), 
                 number_of_bins=list(range(1,20))):
    if columns in ['duration_seconds', 'latitude', 'longitude','date','duration','report_date']:
        plt.clf()
        new_ufos.hist(column=columns, bins=number_of_bins)
    else:
        plt.clf()
        data = new_ufos.groupby(by=columns).count().take([0],axis=1)
        x = data.index.values
        y = data.values.reshape(data.values.shape[0])
        plt.bar(x=x,height=y,data=new_ufos)

interactive(children=(Dropdown(description='columns', options=('date', 'city', 'state', 'country', 'shape', 'd…

From the graph above, I could not agree that we should allow users to pick any columns to plot. That's because if we pick an attribute with many values, the graph will look messy, also meaningless to the users. In my opinion, only allowing users to pick certain columns is a better choice.