In [None]:
%%html
<style>
    #notebook-container{
    width: 100%
    }
</style>

In [None]:
import altair as alt
import pandas as pd

from altair import datum
from vega_datasets import data
from geopy.geocoders import Nominatim

# alt.renderers.enable('notebook')

In [None]:
alt.data_transformers.enable('default', max_rows=None) 

In [None]:
# df = pd.read_csv('https://raw.githubusercontent.com/kai-wren/UCU_DataViz/master/gapminder.csv')
df = pd.read_csv('scrubbed_UFO.csv')
df = df.rename(columns={'longitude ': 'longitude'})
df.replace(to_replace='&#39', value='\'', inplace=True, regex=True)
df.replace(to_replace='&#44', value=',', inplace=True, regex=True)
df['type'] = "UFO"

df.head()

In [None]:
df_meteorite = pd.read_csv('meteorite-landings.csv')
df_meteorite['type'] = "Meteorite"

df_meteorite.head()

Question to be answered: <br>
- I would like to match data with meteorite landing dataset to check that maybe some spotted UFOs were just meteorites and whether or not the concentration of spotted UFO coincides with the concentration of observed meteorites on the global map.
- Are there places on Earth which have more UFOs spotted than in other places. <br>



In [None]:
year_slider = alt.binding_range(min=1906, max=2014, step=1, name='Year')
year_selector = alt.selection_single(name="YearSelector", fields=['selected_year'],
                                bind=year_slider, init={'selected_year': 1995})

base_meteorite = alt.Chart(df_meteorite
).transform_filter(datum.year <= year_selector.selected_year).transform_filter(datum.year >= year_selector.selected_year)

base_ufo = alt.Chart(df
).transform_calculate(year = "year(datum.datetime)"
).transform_filter(datum.year <= year_selector.selected_year
).transform_filter(datum.year >= year_selector.selected_year
)

source = alt.topo_feature(data.world_110m.url, 'countries')
# states_source = alt.topo_feature(data.us_10m.url, feature='states')
legend_source = ['UFO', 'Meteorite']
df_legend = pd.DataFrame(legend_source, columns=['type'])

legend = alt.Chart(df_legend).mark_rect().encode(
y=alt.Y('type:N', axis=alt.Axis(title=None, labelFont='Arial', labelFontSize=12, labelColor='#000000')),
color=alt.Color('type:N', scale=alt.Scale(range=['#1696d2', '#db2b27']), legend=None)
)

base_world = alt.Chart(source, title=alt.TitleParams(text='Locations of Observed UFOs and Meteorites on Global Map per Year', 
                                                     subtitle='Each point mark a place where UFO or meteorite were spotted for selected year', 
                                                     font='Arial Bold', fontSize=24, anchor='start', color='#000000',
                                                    subtitleFontSize=14, subtitleColor='#000000', subtitleFont='Arial')
).mark_geoshape(
    fill='#d2d2d2',
    stroke='white'
)

earth = base_world.project('naturalEarth1')

# states = alt.Chart(states_source).mark_geoshape(
#     fill='#666666',
#     stroke='white'
# ).project('albersUsa')

points_meteorite = base_meteorite.mark_circle(
    size=10,
).encode(
    latitude='reclat:Q',
    longitude='reclong:Q',
    color=alt.value('#1696d2'),
#     tooltip=['type:N', 'year:N', 'name:N', 'mass:N'],
    opacity=alt.value(0.5)
)

points_ufo = base_ufo.mark_circle(
    size=10,
).encode(
    latitude='latitude:Q',
    longitude='longitude:Q',
    color=alt.value('#db2b27'),
#     tooltip=['type:N', 'shape:N', 'city:N', 'duration (hours/min):N', 'datetime:N'],
    opacity=alt.value(0.5)
).add_selection(year_selector)

year_back_ufo = base_ufo.mark_text(fontSize=100, fill='#d2d2d2', align='right', baseline='top', dx=-175, dy=80).encode(
text=alt.Text('mean(year):N'),
)

source_text1 = base_ufo.mark_text(text='Source:', fontWeight='bold', font='Arial', fontSize=11, color='#000000').encode(
x=alt.value(0), y=alt.value(420)).transform_sample(1)

source_text2 = base_ufo.mark_text(text='National UFO Reporting Center (NUFORC) and NASA', font='Arial', fontSize=11, color='#000000').encode(
x=alt.value(156), y=alt.value(420)).transform_sample(1)

year_back_meteorite = base_meteorite.mark_text(fontSize=100, fill='#d2d2d2', align='right', baseline='top', dx=-175, dy=80).encode(
text=alt.Text('mean(year):N'),
)

((earth+year_back_ufo+year_back_meteorite+points_ufo+points_meteorite+source_text1+source_text2).properties(width=800, height=400) | legend
).configure_view(stroke='transparent') 
#| (states+points_ufo).properties(width=600, height=400)

Question to be answered: <br>
Does the amount of spotted UFO increase with the course of time? This is especially interesting with regards to last 20-30 years with widespread communication technologies. <br>

Here I plot simple barchart to highlight an answer to the question. In addition I've added another chart which allows to see in which month and day how many observation happens. <br>

For interactivity, you could select one particular year or years and right hand side chart will be updated for selected years.

In [None]:
base_ufo = alt.Chart(df, title=alt.TitleParams(text='Total Amount of Spotted UFOs over Course of Time', 
                                                     subtitle='Together with distribution of UFOs observation within a year timeline by month and day', 
                                                     font='Arial Bold', fontSize=24, anchor='start', color='#000000',
                                                    subtitleFontSize=14, subtitleColor='#000000', subtitleFont='Arial', dx=70))

selector = alt.selection_multi(encodings=['x'], empty='all')

bars = base_ufo.mark_bar().encode(
y=alt.Y('count()', scale=alt.Scale(type='log', base=2, domain=[0.9, 8001]), 
        axis=alt.Axis(values=[0, 1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 3000, 5000, 7000], titleAngle=0, titleAnchor ='start', 
                      titleY=-7, title='Total amount', titleFont='Arial', titleFontStyle='Italic', titleFontWeight='normal', titleFontSize=12,
                     titleColor='#000000', labelFont='Arial', labelFontSize=12, labelColor='#000000',)
       ),
x=alt.X('year(datetime):T', axis=alt.Axis(titleAnchor ='middle', title='Year', titleFont='Arial', titleFontStyle='Italic', titleFontWeight='normal', 
                                          titleFontSize=12, titleColor='#000000', labelFont='Arial', labelFontSize=12, labelColor='#000000',
                                          titleAlign='center', grid=False)
       ),
color = alt.condition(selector, alt.value('#1696d2'), alt.value('#d2d2d2'))
).add_selection(selector
).properties(width=600, height=400)#.transform_calculate(year = "year(datum.datetime)").transform_filter(datum.year == 1902)

ticks = alt.Chart(df).mark_rect().encode(
y=alt.Y('month(datetime):N', axis=alt.Axis(title=None, labelFont='Arial', labelFontSize=12, labelColor='#000000')),
x=alt.X('date(datetime):O', axis=alt.Axis(title=None, labelAngle=0, labelAlign='center', labelFont='Arial', labelFontSize=12, labelColor='#000000')),
color=alt.Color('count()', title='Total amount', scale=alt.Scale(range=['#cfe8f3', '#062635']))
).transform_filter(selector
).properties(width=600, height=400)

invisible = alt.Chart(df).mark_rect().encode(
y=alt.Y('month(datetime):N', axis=alt.Axis(title=None, labelFont='Arial', labelFontSize=12, labelColor='#000000')),
x=alt.X('date(datetime):O', axis=alt.Axis(title=None, labelAngle=0, labelAlign='center', labelFont='Arial', labelFontSize=12, labelColor='#000000')),
color=alt.Color('count()',legend=None),
opacity=alt.value(0)
).transform_calculate(year = "year(datum.datetime)").transform_filter(datum.year == 1965
).properties(width=600, height=400)

source_text1 = base_ufo.mark_text(text='Source:', fontWeight='bold', font='Arial', fontSize=11, color='#000000').encode(
x=alt.value(0), y=alt.value(440)).transform_sample(1)

source_text2 = base_ufo.mark_text(text='National UFO Reporting Center (NUFORC) and NASA', font='Arial', fontSize=11, color='#000000').encode(
x=alt.value(156), y=alt.value(440)).transform_sample(1)

note_text1 = base_ufo.mark_text(text='Notes:', fontWeight='bold', font='Arial', fontSize=11, color='#000000').encode(
x=alt.value(-3), y=alt.value(453)).transform_sample(1)

note_text2 = base_ufo.mark_text(text='Single click - select a year. Shift + click - select multiple years. Double click - clear selection.', 
                                font='Arial', fontSize=11, color='#000000').encode(
x=alt.value(240), y=alt.value(453)).transform_sample(1)

(bars+source_text1+source_text2+note_text1+note_text2 | (invisible+ticks)).configure_view(stroke='transparent')

Question to be answered: <br>
Do types of spotted UFOs equally spread or only appears in certain locations.<br>

I've made simillar looking map and add some barcharts on the edges of it to indicate distribution across longitude and latitude. To allow some filtering I have added kind of interactive legend made from heatmap with indication of total count per each UFO type.

In [None]:
base_ufo = alt.Chart(df)

source = alt.topo_feature(data.world_110m.url, 'countries')
shape_selector=alt.selection_single(fields=['shape'])
leg_color=alt.Color('count()', legend=None)

base_world = alt.Chart(source).mark_geoshape(
    fill='#666666',
    stroke='white'
)

earth = base_world.project('naturalEarth1')

points = base_ufo.mark_circle(
    size=10
).encode(
    latitude='latitude:Q',
    longitude='longitude:Q',
    tooltip=['shape:N', 'city:N', 'latitude:Q', 'longitude:Q'],
).properties(width=1200, height=600).transform_filter(shape_selector)

lat_ufo = base_ufo.mark_bar().encode(
    y='latitude:Q',
    x='count()',
).properties(width=300, height=600).transform_filter(shape_selector)

lon_ufo = base_ufo.mark_bar().encode(
    y='count()',
    x='longitude:Q',
).properties(width=1200, height=300).transform_filter(shape_selector)

leg = base_ufo.mark_rect().encode(
y='shape:N',
color=alt.condition(shape_selector, leg_color, alt.value('gray'))
).properties(width=100, height=300).add_selection(shape_selector)

leg_text = base_ufo.mark_text().encode(
y='shape:N',
text='count()'
# color=alt.condition(shape_selector, leg_color, alt.value('gray'))
).properties(width=100, height=300)

(lon_ufo | (leg+leg_text) )& ((earth+points) | lat_ufo)

Question to be answered: <br>
How long UFO were observed per country in the dataset?

I was going to represent each country as bubble on world map size of which indicats how long overall duration per country. Unfortunately, I've discovered that country name is almost missing in the dataset. Hence I will need to enrich it first using some other services. For now Canada and UK indicate example how it should look like.

In [None]:
base_ufo = alt.Chart(df)

source = alt.topo_feature(data.world_110m.url, 'countries')

base_world = alt.Chart(source).mark_geoshape(
    fill='#666666',
    stroke='white'
)

earth = base_world.project('naturalEarth1')

points = base_ufo.mark_circle(
    size=10
).encode(
    latitude='latitude:Q',
    longitude='longitude:Q',
    size=alt.Size('duration:Q'),
    tooltip=['country:N']
).transform_aggregate(
latitude='mean(latitude)',
longitude='mean(longitude)',
duration = 'sum(duration (seconds))',
groupby=['country']
)

(earth+points).properties(width=1200, height=600)

# test = base_ufo.mark_bar(
#     size=10
# ).encode(
#     x='country:N',
#     y='sum(duration (seconds)):Q',
# )
# # .transform_aggregate(
# # latitude='mean(latitude)',
# # longitude='mean(longitude)',
# # duration = 'sum(duration (seconds))',
# # groupby=['country']
# # )

# test

# Drafts

Question to be answered: <br>
Are there places on Earth which have more UFOs spotted than in other places. <br>

Here I am plotting a point indicating one record each. This allow to see how encounters are spread on world map. Points are located based on latitude and longitude values. Hence if those values are empty - points located wrongly in the top left corner. In the end density of points indicates places with bigger amounts of contacts. <br>

For interactivity I've added barchart with interval selection to limited amount of dots to be displayed.

In [None]:
base_ufo = alt.Chart(df)

source = alt.topo_feature(data.world_110m.url, 'countries')

# slider = alt.binding_range(min=1900, max=2015, step=1, name='Year')
# selector = alt.selection_single(name="YearSelector", fields=['cutoff'],
#                                 bind=slider, init={'cutoff': 2015})

brush = alt.selection_interval(encodings=['x'],empty='all')

base_world = alt.Chart(source).mark_geoshape(
    fill='#666666',
    stroke='white'
)

earth = base_world.project('naturalEarth1')

points = base_ufo.mark_circle(
    size=10,
    color='red'
).encode(
    latitude='latitude:Q',
    longitude='longitude:Q',
    tooltip=['shape:N', 'city:N', 'duration (hours/min):N', 'date posted:N']
).transform_filter(brush)
# .add_selection(selector).transform_calculate(year = "year(datum.datetime)").transform_filter(datum.year <= selector.cutoff)

area_ufo = base_ufo.mark_bar().encode(
x=alt.X('year(datetime):N'),
y=alt.Y('count()'),
opacity=alt.value(0.3)
)

area_ufo_notselected = area_ufo.add_selection(brush)
area_ufo_selected = area_ufo.mark_bar().encode(opacity=alt.value(1)).transform_filter(brush)

(earth+points).properties(width=1200, height=600) & (area_ufo_notselected + area_ufo_selected).properties(width=1200, height=100)