In [1]:
# Listen to
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
population = pd.read_csv('ACT_population_born_overseas.csv')
from miditime.miditime import MIDITime
import random
import sys
import mido
from mido import Message, MidiFile, MidiTrack, MAX_PITCHWHEEL, MetaMessage
import numpy as np
import math
from music21 import midi
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf

How many years are there in the dataset?

In [2]:
import pandas as pd
from sodapy import Socrata

# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("www.data.act.gov.au", None)

# Example authenticated client (needed for non-public datasets):
# client = Socrata(www.data.act.gov.au,
#                  MyAppToken,
#                  userame="user@example.com",
#                  password="AFakePassword")

# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("i5w3-megw", limit=2000)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)



In [3]:
results_df.head()


Unnamed: 0,name,datetime,aqi_co,aqi_no2,aqi_o3_1hr,aqi_o3_4hr,aqi_pm10,aqi_pm2_5,aqi_site,time
0,Civic,2020-04-15T00:00:00.000,0,0,23,28,36,47,47,21
1,Florey,2020-04-15T00:00:00.000,5,9,30,36,39,88,88,21
2,Monash,2020-04-15T00:00:00.000,6,4,29,36,44,62,62,21
3,Monash,2020-04-14T00:00:00.000,6,11,23,27,33,62,62,23
4,Civic,2020-04-14T00:00:00.000,0,0,18,20,24,39,39,23


Let's examine the days. We see that we have daily data from 2018

In [4]:
results_df['datetime']

0       2020-04-15T00:00:00.000
1       2020-04-15T00:00:00.000
2       2020-04-15T00:00:00.000
3       2020-04-14T00:00:00.000
4       2020-04-14T00:00:00.000
                 ...           
1995    2018-06-11T00:00:00.000
1996    2018-06-10T00:00:00.000
1997    2018-06-10T00:00:00.000
1998    2018-06-10T00:00:00.000
1999    2018-06-09T00:00:00.000
Name: datetime, Length: 2000, dtype: object

In [5]:
results_df['date'] = list(map(lambda d: str(d.split('T')[0]), results_df['datetime']))

In [6]:
results_df['yearmonth'] = list(map(lambda d: str(d.split('-')[0]) + '-' + str(d.split('-')[1]), results_df['date']))

Let's see the data types. We see that we will have to convert datetime to datetime format.

In [7]:
results_df.dtypes

name          object
datetime      object
aqi_co        object
aqi_no2       object
aqi_o3_1hr    object
aqi_o3_4hr    object
aqi_pm10      object
aqi_pm2_5     object
aqi_site      object
time          object
date          object
yearmonth     object
dtype: object

In [8]:
results_df[results_df['aqi_co'].isnull()].count()

name          714
datetime      714
aqi_co          0
aqi_no2       110
aqi_o3_1hr    712
aqi_o3_4hr    698
aqi_pm10      660
aqi_pm2_5     656
aqi_site      714
time          714
date          714
yearmonth     714
dtype: int64

We see that about 40% of the values are null, so we drop them

In [9]:
air = results_df.dropna()

In [10]:
air

Unnamed: 0,name,datetime,aqi_co,aqi_no2,aqi_o3_1hr,aqi_o3_4hr,aqi_pm10,aqi_pm2_5,aqi_site,time,date,yearmonth
0,Civic,2020-04-15T00:00:00.000,0,0,23,28,36,47,47,21,2020-04-15,2020-04
1,Florey,2020-04-15T00:00:00.000,5,9,30,36,39,88,88,21,2020-04-15,2020-04
2,Monash,2020-04-15T00:00:00.000,6,4,29,36,44,62,62,21,2020-04-15,2020-04
3,Monash,2020-04-14T00:00:00.000,6,11,23,27,33,62,62,23,2020-04-14,2020-04
4,Civic,2020-04-14T00:00:00.000,0,0,18,20,24,39,39,23,2020-04-14,2020-04
...,...,...,...,...,...,...,...,...,...,...,...,...
1993,Monash,2018-06-11T00:00:00.000,10,16,31,38,24,54,54,24,2018-06-11,2018-06
1994,Florey,2018-06-11T00:00:00.000,10,18,30,36,37,71,71,24,2018-06-11,2018-06
1997,Florey,2018-06-10T00:00:00.000,14,12,28,34,45,77,77,24,2018-06-10,2018-06
1998,Monash,2018-06-10T00:00:00.000,12,14,29,35,26,60,60,24,2018-06-10,2018-06


We have to convert the dates to sequential integers

In [11]:

air.aqi_co = air.aqi_co.astype(int) 
air.aqi_no2 = air.aqi_no2.astype(int) 
air.aqi_o3_1hr = air.aqi_o3_1hr.astype(int) 
air.aqi_o3_4hr = air.aqi_o3_4hr.astype(int) 
air.aqi_pm10 = air.aqi_pm10.astype(int) 
air.aqi_pm2_5 = air.aqi_pm2_5.astype(int)
air.aqi_pm2_5 = air.aqi_pm2_5.astype(int)
air.aqi_site = air.aqi_site.astype(int)

We average the readings per year month

In [12]:
air_monthly_mean = air.groupby(by='yearmonth').mean()
air_monthly_mean.reset_index(inplace=True)

In [13]:
air_monthly_mean

Unnamed: 0,yearmonth,aqi_co,aqi_no2,aqi_o3_1hr,aqi_o3_4hr,aqi_pm10,aqi_pm2_5,aqi_site
0,2018-06,8.55814,13.906977,28.023256,33.883721,26.162791,49.372093,56.488372
1,2018-07,7.916667,15.1,29.983333,36.216667,31.816667,56.116667,61.95
2,2018-08,5.532258,14.758065,32.241935,39.403226,25.193548,38.854839,47.387097
3,2018-09,4.216667,14.933333,32.616667,40.0,23.383333,30.566667,41.266667
4,2018-10,2.387097,9.596774,37.354839,45.16129,22.709677,20.548387,46.112903
5,2018-11,1.745763,8.220339,35.711864,43.322034,29.118644,24.474576,50.0
6,2018-12,2.442308,9.076923,39.769231,48.0,46.634615,35.980769,69.057692
7,2019-01,3.327586,8.62069,53.034483,63.068966,42.741379,37.206897,66.810345
8,2019-02,1.821429,7.267857,37.196429,44.785714,58.607143,33.660714,72.375
9,2019-03,2.048387,9.564516,34.403226,41.403226,31.709677,34.677419,48.080645


In [14]:
fig = plt.figure()
%matplotlib inline
axes = fig.add_axes([0.1,0.1,0.8,0.8])
axes.plot(air_monthly_mean['yearmonth'], air_monthly_mean['aqi_co'])


[<matplotlib.lines.Line2D at 0x1c299ab750>]

In [15]:
init_notebook_mode(connected=True)
cf.go_offline()

In [16]:
air_monthly_mean.iplot(kind='line', x = 'yearmonth', y = air_monthly_mean.columns.names[1:7], xaxis_type='category')

In [61]:
air_monthly_mean = air_monthly_mean[['aqi_co','aqi_no2','aqi_o3_4hr','aqi_pm10','aqi_pm2_5']]

In [65]:
air_monthly_mean

Unnamed: 0,aqi_co,aqi_no2,aqi_o3_4hr,aqi_pm10,aqi_pm2_5
0,8.55814,13.906977,33.883721,26.162791,49.372093
1,7.916667,15.1,36.216667,31.816667,56.116667
2,5.532258,14.758065,39.403226,25.193548,38.854839
3,4.216667,14.933333,40.0,23.383333,30.566667
4,2.387097,9.596774,45.16129,22.709677,20.548387
5,1.745763,8.220339,43.322034,29.118644,24.474576
6,2.442308,9.076923,48.0,46.634615,35.980769
7,3.327586,8.62069,63.068966,42.741379,37.206897
8,1.821429,7.267857,44.785714,58.607143,33.660714
9,2.048387,9.564516,41.403226,31.709677,34.677419


We are going to assign a note from the pentatonic major scale

In reality we have 5 variables, because we just have a 1 hour and 4 hour measurement for O3 and aqi site is just the site of measurement. 

We will assign a note from the pentatonic scale to each one. We will vary the time frequency depending on the measurement. The options are a 1/4 notes for fair values. 

ppm_levels = { 'good': }

In [18]:
ppm_levels = {'good':33,'fair': 66,'poor': 100,'very poor': 150, 'hazardous': 200}

33

In [None]:
## Musical part



In [55]:
def calculate_key(midi_note_number, key_type):
    '''
    midi_note_number: based on the MIDI standard
    key_type options:
        major
        minor_natural
        major_pentatonic
    '''
    key = [midi_note_number]
    
    if key_type.lower() == 'major':
        # Major scales are formed by taking the follwing steps = Whole, Whole, Half, Whole, Whole, Whole, Half
        # Where whole steps are two semtitones and Half are one semitone. 
        # Increasing the midi number means an increase of one semitone
        steps = ['W','W','H','W','W','W','H']
        for i in range(len(steps)):
            # If we need to take a whole step, increase by two, otherwise by one
            if steps[i] == 'W':
                midi_note_number = midi_note_number + 2
                key.append(midi_note_number)
            else:
                midi_note_number = midi_note_number + 1
                key.append(midi_note_number)
    elif key_type.lower() == 'minor_natural':
        # Minor scales are formed by taking the follwing steps = Whole, Whole, Half, Whole, Whole, Whole, Half
        # Whole, Half, Whole, Whole, Half, Whole, Whole
        steps = ['W','H', 'W', 'W', 'H' ,'W', 'W']
        for i in range(len(steps)):
            # If we need to take a whole step, increase by two, otherwise by one
            if steps[i] == 'W':
                midi_note_number = midi_note_number + 2
                key.append(midi_note_number)
            else:
                midi_note_number = midi_note_number + 1
                key.append(midi_note_number)   
    elif key_type.lower() == 'major_pentatonic':
        # Major scales are formed by taking the follwing steps = Whole, Whole, Half, Whole, Whole, Whole, Half
        # Where whole steps are two semtitones and Half are one semitone. 
        # Increasing the midi number means an increase of one semitone
        steps = ['W','W','H','W','W','W','H']
        for i in range(len(steps)):
            # If we need to take a whole step, increase by two, otherwise by one
            if steps[i] == 'W':
                midi_note_number = midi_note_number + 2
                key.append(midi_note_number)
            else:
                midi_note_number = midi_note_number + 1
                key.append(midi_note_number)
            # only select 1,2,3,5,7 degrees (due to 9 indexing we substract one)
        key = [key[i] for i in [0,1,2,4,6]]

    return key

In [56]:
key = calculate_key(65,'major_pentatonic')

[65, 67, 69, 72, 76]

In [None]:
def calculate_times(value):
    ppm_levels = {'good':33,'fair': 66,'poor': 100,'very poor': 150, 'hazardous': 200}
    if value < ppm_levels['good']:
        note_time = 4
    elif value < ppm_levels['fair']:
        note_time = 8
    elif value < ppm_levels['poor']:
        note_time = 16
    elif value  < ppm_levels['very poor']:
        note_time = 32
    elif value > ppm_levels['very poor']:
        note_time = 64
    return note_time



In [99]:
midi_file = MidiFile()
i = 0
iters = 0
for column in air_monthly_mean:
    track = MidiTrack()
    midi_file.tracks.append(track)
    # calculate how frequently you will insert it 
    # determine what note it is going to be assigned
    for value in air_monthly_mean[column]:
        # determine how many you will insert, which is the number provided by calculate
        times = calculate_times(value)     
        for insert in range(times):
            # if we want to insert it four times, we need to space it such that it fits in one measure, we do that by 
            # dividing 480, the number of ticks per measure over the number of times
            track.append(Message('note_on', note=key[i], time=int(960/times)))
            track.append(Message('note_off', note=key[i], velocity=127, time=int(960/times)))
            track.append(Message('program_change', program=12+i))

    i += 1
    track.append(MetaMessage('set_tempo',tempo=1000000))
        
# we calculate the note time value. We will fill each measure (4 pulses) with each note time
# 1/4 notes will be inserted four times
# 1/8 notes will be inserted 8 times

# 1/4 = good
# 1/8 = fair
# 1/16 = poor
# 1/32 = very poor
# 1/64 = hazardous

midi_file.save('air6000.midi')

65