## Attempt to sonify flow of syrian refugees
* Data from UN : http://popstats.unhcr.org/en/asylum_seekers_monthly



In [9]:
# My usual setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas 
from datetime import datetime
import seaborn as sns
import glob
from os.path import basename
sns.set_context('poster') 

In [10]:
# miditime libs
from miditime.miditime import MIDITime 

## Preprocess data 
* group by month and year (multiple entries per country of origin and even several for each then)
* convert data type
* MAKE SURE SORTED

In [11]:
df = pandas.read_csv('../data/SYRIA-unhcr_popstats_export_asylum_seekers_monthly_2010-2017-06.csv', skiprows=2)

In [12]:
df.head()

Unnamed: 0,Country / territory of asylum/residence,Origin,Year,Month,Value
0,Hungary,Syrian Arab Rep.,2010,January,2
1,Hungary,Syrian Arab Rep.,2010,February,3
2,Hungary,Syrian Arab Rep.,2010,March,1
3,Hungary,Syrian Arab Rep.,2010,April,3
4,Hungary,Syrian Arab Rep.,2010,June,2


In [13]:
#convert to numeric
df['value_numeric'] = pandas.to_numeric(df.Value, errors="coerce")

In [14]:
# not sure why i have to apply first to convert to string, but here we are.
df['datetime'] = pandas.to_datetime(df.apply(lambda x: str(x['Year']) + x['Month'], axis=1), format="%Y%B")

In [15]:
monthly_totals = df.groupby('datetime').value_numeric.sum()

In [16]:
monthly_totals.head()

datetime
2010-01-01    499.0
2010-02-01    635.0
2010-03-01    519.0
2010-04-01    433.0
2010-05-01    472.0
Name: value_numeric, dtype: float64

In [17]:
len(monthly_totals.index)

91

In [18]:
MIN = monthly_totals.min()
MAX = monthly_totals.max()

In [19]:
print(MIN)
print(MAX)

433.0
102089.0


In [None]:
monthly_totals = monthly_totals.sort_index()

### Chart data

In [None]:
monthly_totals.plot()
plt.title("Total refugees from Syria, all origins")

## Start MIDI code

Instantiate the class with a tempo (120bpm is the default), an output file destination, the number of seconds you want to represent a year in the final song (default is 5 sec/year), the base octave (C5 is middle C, so the default is 5, and how many octaves you want your output to range over (default is 1).

In [21]:
# HACK COZ I COULDNT FIGURE OUT HOW TO CONVERT SERIES TO DICT with PROPER FORMAT
syria_data = [{'datetime':x, 'magnitude': y} for x, y in zip(monthly_totals.index, monthly_totals.values)]


Convert your date/time data into an integer, like days since the epoch (Jan. 1, 1970). You can use the days_since_epoch() helper method, or not:

In [22]:
syria_data_epoched = [{'days_since_epoch': mymidi.days_since_epoch(d['datetime']), 'magnitude': d['magnitude']} for d in syria_data] 

Convert your integer date/time to something reasonable for a song. For example, at 120 beats per minute, you'll need to scale the data down a lot to avoid a very long song if your data spans years. This uses the seconds_per_year attribute you set at the top, so if your date is converted to something other than days you may need to do your own conversion. But if your dataset spans years and your dates are in days (with fractions is fine), use the beat() helper method.

In [23]:
syria_data_timed = [{'beat': mymidi.beat(d['days_since_epoch']), 'magnitude': d['magnitude']} for d in syria_data_epoched]

In [24]:
syria_data

[{'datetime': Timestamp('2010-01-01 00:00:00'), 'magnitude': 499.0},
 {'datetime': Timestamp('2010-02-01 00:00:00'), 'magnitude': 635.0},
 {'datetime': Timestamp('2010-03-01 00:00:00'), 'magnitude': 519.0},
 {'datetime': Timestamp('2010-04-01 00:00:00'), 'magnitude': 433.0},
 {'datetime': Timestamp('2010-05-01 00:00:00'), 'magnitude': 472.0},
 {'datetime': Timestamp('2010-06-01 00:00:00'), 'magnitude': 455.0},
 {'datetime': Timestamp('2010-07-01 00:00:00'), 'magnitude': 459.0},
 {'datetime': Timestamp('2010-08-01 00:00:00'), 'magnitude': 604.0},
 {'datetime': Timestamp('2010-09-01 00:00:00'), 'magnitude': 602.0},
 {'datetime': Timestamp('2010-10-01 00:00:00'), 'magnitude': 634.0},
 {'datetime': Timestamp('2010-11-01 00:00:00'), 'magnitude': 557.0},
 {'datetime': Timestamp('2010-12-01 00:00:00'), 'magnitude': 604.0},
 {'datetime': Timestamp('2011-01-01 00:00:00'), 'magnitude': 645.0},
 {'datetime': Timestamp('2011-02-01 00:00:00'), 'magnitude': 596.0},
 {'datetime': Timestamp('2011-03-0

In [25]:
start_time = syria_data_timed[0]['beat']

Set up some functions to scale your other variable (magnitude in our case) to match your desired mode/key and octave range. There are helper methods to assist this scaling, very similar to a charting library like D3. You can choose a linear or logarithmic scale.

## Notes on transforms
Velocity is volume, basically. 
Add two new transforms: mag_to_beat and mag_to_pitch

In [31]:
def mag_to_pitch_tuned(magnitude):
    # Where does this data point sit in the domain of your data? (I.E. the min magnitude is 3, the max in 5.6). In this case the optional 'True' means the scale is reversed, so the highest value will return the lowest percentage.
    #scale_pct = mymidi.linear_scale_pct(MIN, MAX, magnitude)

    # Another option: Linear scale, reverse order
    scale_pct = mymidi.linear_scale_pct(MIN, MAX, magnitude, True)

    # Another option: Logarithmic scale, reverse order
    #scale_pct = mymidi.log_scale_pct(MIN, MAX, magnitude, True)

    # Pick a range of notes. This allows you to play in a key.
    c_major = ['C', 'D', 'E', 'F', 'G', 'A', 'B']

    #Find the note that matches your data point
    note = mymidi.scale_to_note(scale_pct, c_major)

    #Translate that note to a MIDI pitch
    midi_pitch = mymidi.note_to_midi_pitch(note)

    return midi_pitch

In [None]:
#TRANFORM MAGNITUDE TO BEAT, NOT PITCH
def mag_to_beat(magnitude):
    # Where does this data point sit in the domain of your data? (I.E. the min magnitude is 3, the max in 5.6). In this case the optional 'True' means the scale is reversed, so the highest value will return the lowest percentage.
    #scale_pct = mymidi.linear_scale_pct(MIN, MAX, magnitude)

    # Another option: Linear scale, reverse order
    scale_pct = mymidi.linear_scale_pct(MIN, MAX, magnitude, True)

    # Another option: Logarithmic scale, reverse order
    #scale_pct = mymidi.log_scale_pct(MIN, MAX, magnitude, True)

    # Pick a range of notes. This allows you to play in a key.
    c_major = ['C', 'D', 'E', 'F', 'G', 'A', 'B']

    #Find the note that matches your data point
    note = mymidi.scale_to_note(scale_pct, c_major)

    #Translate that note to a MIDI pitch
    midi_pitch = mymidi.note_to_midi_pitch(note)

    return midi_pitch

In [77]:
# try transform to velocity

note_list = [] 

for d in syria_data_timed:

    note_list.append([
        d['beat'] - start_time,
        mag_to_pitch_tuned(d['magnitude']),
        100, #int(mymidi.linear_scale_pct(10, 200, d['magnitude'])),  # velocity from 0 to 255
        1  # duration, in beats
    ]) 

[BEAT, PITCH, VELOCITY (VOLUME), DURATION_OF_NOTE]

In [78]:
note_list

[[0.0, 71, 100, 1],
 [0.8500000000000227, 71, 100, 1],
 [1.6200000000000045, 71, 100, 1],
 [2.4599999999999795, 71, 100, 1],
 [3.2900000000000205, 71, 100, 1],
 [4.1299999999999955, 71, 100, 1],
 [4.9599999999999795, 71, 100, 1],
 [5.800000000000011, 71, 100, 1],
 [6.649999999999977, 71, 100, 1],
 [7.470000000000027, 71, 100, 1],
 [8.319999999999993, 71, 100, 1],
 [9.139999999999986, 71, 100, 1],
 [9.990000000000009, 71, 100, 1],
 [10.839999999999975, 71, 100, 1],
 [11.610000000000014, 71, 100, 1],
 [12.45999999999998, 71, 100, 1],
 [13.279999999999973, 71, 100, 1],
 [14.129999999999995, 71, 100, 1],
 [14.949999999999989, 71, 100, 1],
 [15.800000000000011, 71, 100, 1],
 [16.649999999999977, 71, 100, 1],
 [17.470000000000027, 71, 100, 1],
 [18.319999999999993, 71, 100, 1],
 [19.139999999999986, 71, 100, 1],
 [19.99000000000001, 71, 100, 1],
 [20.839999999999975, 71, 100, 1],
 [21.629999999999995, 71, 100, 1],
 [22.480000000000018, 71, 100, 1],
 [23.30000000000001, 71, 100, 1],
 [24.1499

## Make MIDI file

In [79]:
mymidi = MIDITime(120, '../audio/syria-2010-2017-FREQ.mid', 5, 5, 1) 
# Add a track with those notes
mymidi.add_track(note_list)

# Output the .mid file
mymidi.save_midi()

71 0.0 1 100
71 0.8500000000000227 1 100
71 1.6200000000000045 1 100
71 2.4599999999999795 1 100
71 3.2900000000000205 1 100
71 4.1299999999999955 1 100
71 4.9599999999999795 1 100
71 5.800000000000011 1 100
71 6.649999999999977 1 100
71 7.470000000000027 1 100
71 8.319999999999993 1 100
71 9.139999999999986 1 100
71 9.990000000000009 1 100
71 10.839999999999975 1 100
71 11.610000000000014 1 100
71 12.45999999999998 1 100
71 13.279999999999973 1 100
71 14.129999999999995 1 100
71 14.949999999999989 1 100
71 15.800000000000011 1 100
71 16.649999999999977 1 100
71 17.470000000000027 1 100
71 18.319999999999993 1 100
71 19.139999999999986 1 100
71 19.99000000000001 1 100
71 20.839999999999975 1 100
71 21.629999999999995 1 100
71 22.480000000000018 1 100
71 23.30000000000001 1 100
71 24.149999999999977 1 100
71 24.970000000000027 1 100
71 25.819999999999993 1 100
71 26.670000000000016 1 100
71 27.49000000000001 1 100
71 28.339999999999975 1 100
71 29.160000000000025 1 100
71 30.00999999999