# Data Sonification and Visualization of COVID-19-Cases in Austria #

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import sounddevice as sd
from IPython.display import HTML, display
import ipywidgets as widgets

In [2]:
df = pd.read_csv('CovidFaelle_Timeline.csv', sep=';')

In [3]:
df.head(20)

Unnamed: 0,Time,Bundesland,BundeslandID,AnzEinwohner,AnzahlFaelle,AnzahlFaelleSum,AnzahlFaelle7Tage,SiebenTageInzidenzFaelle,AnzahlTotTaeglich,AnzahlTotSum,AnzahlGeheiltTaeglich,AnzahlGeheiltSum
0,26.02.2020 00:00:00,Burgenland,1,294436,0,0,0,0,0,0,0,0
1,26.02.2020 00:00:00,Kärnten,2,561293,0,0,0,0,0,0,0,0
2,26.02.2020 00:00:00,Niederösterreich,3,1684287,0,0,0,0,0,0,0,0
3,26.02.2020 00:00:00,Oberösterreich,4,1490279,0,0,0,0,0,0,0,0
4,26.02.2020 00:00:00,Salzburg,5,558410,0,0,0,0,0,0,0,0
5,26.02.2020 00:00:00,Steiermark,6,1246395,0,0,0,0,0,0,0,0
6,26.02.2020 00:00:00,Tirol,7,757634,0,0,0,0,0,0,0,0
7,26.02.2020 00:00:00,Vorarlberg,8,397139,0,0,0,0,0,0,0,0
8,26.02.2020 00:00:00,Wien,9,1911191,1,1,1,5232339,0,0,0,0
9,26.02.2020 00:00:00,Österreich,10,8901064,1,1,1,1123461,0,0,0,0


In [4]:
df.columns

Index(['Time', 'Bundesland', 'BundeslandID', 'AnzEinwohner', 'AnzahlFaelle',
       'AnzahlFaelleSum', 'AnzahlFaelle7Tage', 'SiebenTageInzidenzFaelle',
       'AnzahlTotTaeglich', 'AnzahlTotSum', 'AnzahlGeheiltTaeglich',
       'AnzahlGeheiltSum'],
      dtype='object')

In [5]:
df['SiebenTageInzidenzFaelle'].head(20) # noticing error in floats

0              0
1              0
2              0
3              0
4              0
5              0
6              0
7              0
8     0,05232339
9     0,01123461
10             0
11             0
12             0
13             0
14             0
15             0
16             0
17             0
18     0,1569702
19    0,03370384
Name: SiebenTageInzidenzFaelle, dtype: object

### Cleaning the Data ###

In [6]:
# cleaning data - changing ',' in floats to '.'.
df['SiebenTageInzidenzFaelle'] = df['SiebenTageInzidenzFaelle'].str.replace(',', '.')
# getting rid of each 10th value since this is the value for the whole of austria
df = df.loc[(df['BundeslandID'] % 10 != 0), :].reset_index()
df.head(10)

Unnamed: 0,index,Time,Bundesland,BundeslandID,AnzEinwohner,AnzahlFaelle,AnzahlFaelleSum,AnzahlFaelle7Tage,SiebenTageInzidenzFaelle,AnzahlTotTaeglich,AnzahlTotSum,AnzahlGeheiltTaeglich,AnzahlGeheiltSum
0,0,26.02.2020 00:00:00,Burgenland,1,294436,0,0,0,0.0,0,0,0,0
1,1,26.02.2020 00:00:00,Kärnten,2,561293,0,0,0,0.0,0,0,0,0
2,2,26.02.2020 00:00:00,Niederösterreich,3,1684287,0,0,0,0.0,0,0,0,0
3,3,26.02.2020 00:00:00,Oberösterreich,4,1490279,0,0,0,0.0,0,0,0,0
4,4,26.02.2020 00:00:00,Salzburg,5,558410,0,0,0,0.0,0,0,0,0
5,5,26.02.2020 00:00:00,Steiermark,6,1246395,0,0,0,0.0,0,0,0,0
6,6,26.02.2020 00:00:00,Tirol,7,757634,0,0,0,0.0,0,0,0,0
7,7,26.02.2020 00:00:00,Vorarlberg,8,397139,0,0,0,0.0,0,0,0,0
8,8,26.02.2020 00:00:00,Wien,9,1911191,1,1,1,0.05232339,0,0,0,0
9,10,27.02.2020 00:00:00,Burgenland,1,294436,0,0,0,0.0,0,0,0,0


In [7]:
# getting min vaules
dfAmin = df.groupby('Bundesland')['AnzahlFaelle'].min()
# getting max values
dfAmax = df.groupby('Bundesland')['AnzahlFaelle'].max()
dfAmin, dfAmax

(Bundesland
 Burgenland          0
 Kärnten             0
 Niederösterreich    0
 Oberösterreich      0
 Salzburg            0
 Steiermark          0
 Tirol               0
 Vorarlberg          0
 Wien                1
 Name: AnzahlFaelle, dtype: int64,
 Bundesland
 Burgenland           307
 Kärnten              843
 Niederösterreich    1133
 Oberösterreich      2257
 Salzburg             805
 Steiermark          1097
 Tirol               1006
 Vorarlberg           803
 Wien                1934
 Name: AnzahlFaelle, dtype: int64)

In [8]:
# defining the amplitude values for the Audio-Enginge via the Infectionrate and scaling the values to go between 0 and 1
# scikitlearn - standard scaler for future iteration

In [9]:
IDS = list(set(df['BundeslandID']))

dfAmps = [np.array(df.loc[(df['BundeslandID'] == bid), 'AnzahlFaelle']) for bid in IDS]
dfAmpsn = [dfAmps[n]/dfAmps[n].max() for n in range(len(dfAmps))] 

### Creating the Audio Engine ###

In [10]:
sd.query_devices()

  0 BlackHole 16ch, Core Audio (16 in, 16 out)
> 1 MacBook Pro Mikrofon, Core Audio (1 in, 0 out)
< 2 MacBook Pro Lautsprecher, Core Audio (0 in, 2 out)
  3 QuickTime Player Input, Core Audio (16 in, 16 out)
  4 Screen Record w/Audio, Core Audio (0 in, 2 out)

In [19]:
sd.default.device = 'Screen Record w/Audio, Core Audio'   # just setting this up for screen-rec

In [12]:
# notelength
dur = 0.125
# attack and decay of tones
a = dur * 0.1
d = dur * 0.9


# simple sine-oscillator
def sine(frq, a, d):
    sr = 44100
    env = np.concatenate((np.linspace(0, 0.5, int(round(sr * a, 0))), np.linspace(0.5, 0, int(round(sr * d, 0)))))
    t = np.arange(int(round(d * sr, 0)) + int(round(a * sr, 0))) / sr
    sine = 1 * np.sin(2 * np.pi * frq * t) * env
    return sine


# pause - function. for future iterations of this piece.
def pause(note):
    pause = np.zeros_like(note)
    return pause


# applying frequency modulation to the oscillator
def fm(freq, ratio, a, d):
    freqfm = freq + sine(freq * ratio, a, d)
    return freqfm


# simple panning - algorithm
def panner(x, angle):
    # pan a mono audio source into stereo
    # x is a numpy array, angle is the angle in radiants
    left = np.sqrt(2)/2.0 * (np.cos(angle) - np.sin(angle)) * x
    right = np.sqrt(2)/2.0 * (np.cos(angle) + np.sin(angle)) * x
    return np.dstack((left,right))[0]


#### Defining the Score ####

In [13]:
# tuning in just intonation to generate alienating feeling

In [14]:
def score(basefreq):
    basemult = [1, 2, 1, 4, 2, 4, 8, 8, 16]
    intervals = [1, 1, (3/4), 1, (15/8), (9/5), (6/5), (2/3), (3/4)]
    fmratios = [5, 2, 2, 1.25, 2, 2, 1.25, 1.5, 2.5]
    score1 = []
    subscore = []
    for j in range(len(dfAmpsn)):
        for i in range(len(dfAmpsn[j])):
            subscore.append(sine(fm(basefreq * basemult[j] * intervals[j], fmratios[j], a, d), a, d) * dfAmpsn[j][i])
        score1.append(np.concatenate(subscore))
        subscore = []
    return score1

### Data Animation ###

In [15]:
plt.style.use('seaborn-darkgrid')
fig, ax = plt.subplots(figsize=(20, 10))
ax.set_xlim(0, 10)
ax.set_ylim(0, df['AnzahlFaelle'].max() + 100)
ax.set_xticks(np.arange(1, 10))
ax.set_xticklabels(df.loc[0:8, 'Bundesland'])
line, = ax.plot(0, 0, 'yo', ms=35)

def animation_frame(i):
    line.set_xdata(df.loc[i:i+8, 'BundeslandID'])
    line.set_ydata(df.loc[i:i+8, 'AnzahlFaelle'])
    return line, 

animation = FuncAnimation(fig, func=animation_frame, frames=np.arange(0, len(df), 9), interval=dur * 1000, blit=False, repeat=False)

plt.close()

## Executing Data Sonification and Visualization ##

In [16]:
# one might need to execute the following cell twice (or more times) to get audio and animation to sync.

In [17]:
# slider for setting the basefrequency
slider = widgets.IntSlider(value=110, min=55, max=220, step=1, description='basefreq')
display(slider)

IntSlider(value=110, description='basefreq', max=220, min=55)

In [21]:
basefreq = slider.value
pan1 = -30
volscal = 0.45

sd.play((panner(score(basefreq)[0], np.radians(pan1)) \
        + panner(score(basefreq)[1], np.radians(pan1 + (80/9))) \
        + panner(score(basefreq)[2], np.radians(pan1 + (80/9) * 2)) \
        + panner(score(basefreq)[3], np.radians(pan1 + (80/9) * 3)) \
        + panner(score(basefreq)[4], np.radians(pan1 + (80/9) * 4)) \
        + panner(score(basefreq)[5], np.radians(pan1 + (80/9) * 5)) \
        + panner(score(basefreq)[6], np.radians(pan1 + (80/9) * 6)) \
        + panner(score(basefreq)[7], np.radians(pan1 + (80/9) * 7)) \
        + panner(score(basefreq)[8], np.radians(pan1 + (80/9) * 8))) * volscal, 44100)


HTML(animation.to_html5_video())