# Python Assignment 3 – Part 1
### MCT4001 – Team B
Arvid, Jakob, Joseph, Kristian
## Loading libraries

In [None]:
%config IPCompleter.greedy=True

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
import soundfile as sf
import librosa
import librosa.display
import pandas as pd
import time

## Metrics - start

In [None]:
# Start timer
start = time.time()

## Making a Class and Functions

In [None]:
class Song:
    def __init__(self, a, ha, o):
        self.audio = a
        self.harmonicAudio = ha
        self.onsets = o
        self.segments = []
    
    def segmentAudio(self):
        sample_start = 0

        # slicing based on onset detection and appending the slices together
        for i in self.onsets:
            if sample_start != i:
                segment = np.array(self.audio[sample_start:i])
                harmonicSegment = np.array(self.harmonicAudio[sample_start:i])
                self.segments.append(self.createSegmentClass(segment, harmonicSegment))

                sample_start = i
        
        #  add the final segment
        if sample_start != (self.audio.size - 1):
            segment = self.audio[sample_start:(self.audio.size)]
            harmonicSegment = np.array(self.harmonicAudio[sample_start:(self.audio.size)])
            self.segments.append(self.createSegmentClass(segment, harmonicSegment))
    
    def createSegmentClass(self,segment, harmonicSegment):
        # Calculate the median value of the spectral centroid
        spectralCentroid = librosa.feature.spectral_centroid(y=harmonicSegment, n_fft=1024)
        spectralCentroidMean = np.average(spectralCentroid)
        return Segment(segment, spectralCentroidMean)

In [None]:
# Define segment class
class Segment(Song):
    def __init__(self, seg, scm):
        self.segment = seg
        self.spectralCentroidMean = scm

## Input

In [None]:
# Load our two audio files

sr=44100

song1 = './Files/cullum_shortened.wav'
song2 = './Files/rihanna_shortened.wav'

cullum, sr = librosa.load(song1, sr=sr)
rihanna, sr = librosa.load(song2, sr=sr)


## Analysis / Segmentation

In [None]:
# Split each audio file into percussive and harmonic arrays using librosa.decompose.hpss 
cullum_harmonic, cullum_percussive = librosa.effects.hpss(cullum)
rihanna_harmonic, rihanna_percussive = librosa.effects.hpss(rihanna)

In [None]:
# Run onset detection on the percussive arrays for each audio file
onset_cullum = librosa.onset.onset_detect(cullum_percussive, backtrack=True, units='samples')
onset_rihanna = librosa.onset.onset_detect(rihanna_percussive, backtrack=True, units='samples')

In [None]:
# Print number of segments for each file
print("Number of segments for Cullum:\t" + str(onset_cullum.size + 1))
print("Number of segments for Rihanna:\t" + str(onset_rihanna.size + 1))

In [None]:
# Create classes for each song, then segment each song with Segment classes
rihannaClass = Song(rihanna, rihanna_harmonic, onset_rihanna)
cullumClass = Song(cullum, cullum_harmonic, onset_cullum)

rihannaClass.segmentAudio()
cullumClass.segmentAudio()

#### Adding Dataframes

In [None]:
# Function to fill pandas dataframes with information

def fill_dataframe(df, segments, str):
    
    scm = []
    segments_length = []
    sample_start_counter = 0
    sample_start = []
    sample_end = []
    
    for i in segments:
        
        sample_start.append(sample_start_counter)
        sample_end.append(sample_start_counter + len(i.segment))
        sample_start_counter = sample_start_counter + len(i.segment)
        scm.append(i.spectralCentroidMean)
        segments_length.append(len(i.segment))
        
    df['Scm'] = scm
    df['id'] = str
    df['Seg_length'] = segments_length
    df['Segment_start'] = sample_start
    df['Segment_end'] = sample_end
        
    


In [None]:
# Creating the dataframes and putting information into them

df_rihanna = pd.DataFrame()
df_cullum = pd.DataFrame()

fill_dataframe(df_rihanna, rihannaClass.segments, 'rihanna')
fill_dataframe(df_cullum, cullumClass.segments, 'cullum')



In [None]:
# Merging the two dataframes
frames = [df_cullum, df_rihanna]

result = pd.concat(frames)

# Sorting the two dataframes based on Spectral Centroid Mean in ascending order

final = result.sort_values(by=['Scm'], ascending=True)


print("Total number of segments in output file: " + str(len(final)))

## Output to .csv

In [None]:
# Print to csv file

# Adding new segment start and end positions based on final result

sample_start_counter = 0
sample_start = []
sample_end = []
    
for index, row in final.iterrows():
        
    sample_start.append(sample_start_counter)
    sample_end.append(sample_start_counter + row['Seg_length'])
    sample_start_counter = sample_start_counter + row['Seg_length']
    
# Adding the two new columns
    
final['New_Segment_start'] = sample_start
final['New_Segment_end'] = sample_end

# Printing the csv

final.to_csv('output.csv')


## Sequencing

In [None]:
# Adding the segments back together in order of Index from dataframe
combined_audio = np.array([])
output_file_segments = 0
for index, row in final.iterrows():
    if row['id'] == 'rihanna':
        combined_audio = np.append(combined_audio, rihannaClass.segments[index].segment)
    elif row['id'] == 'cullum':
        combined_audio = np.append(combined_audio, cullumClass.segments[index].segment)


In [None]:
# Comparing the new audio file size with the two originals

print("Total number of samples in both input audio files:\t{:n} samples".format(rihanna.size+cullum.size))
print("Total number of samples in the output audio file:\t{:n} samples".format(combined_audio.size))

print("\nTime of both input audio files: \t\t\t{:.4f} seconds".format((rihanna.size+cullum.size) / sr))
print("Time of output audio file: \t\t\t\t{:.4f} seconds".format(combined_audio.size / sr))

#### The re-sequenced version

In [None]:
# Visualize and play the newly sequenced audio

# Choosing style and colors for the plot
plt.style.use('seaborn-pastel')
plt.style.use('seaborn-white')
fig, ax = plt.subplots(figsize=(16, 4), facecolor='#d3fbb5', dpi=150.0)
ax.plot(combined_audio, color='#faa39d')
ax.set_facecolor('#e6f0fe')

# Setting labels
ax.set_title("Don't Stop The [combined] Music", fontsize='24', weight='roman', y=1.05, family='monospace')
ax.set_xlabel('time (m)', fontsize='x-large', family='monospace')
ax.set_ylabel('amplitude', fontsize='x-large', family='monospace')
ax.set_xmargin(0.005)

# Generating lists for showing x-axis ticks every minute,
# by finding the floor value of (length // 60) + 1 for the np.arange().
tick_stop = ((len(combined_audio)/sr)//60)+1
s_ticks_value = np.arange(sr*60,sr*60*tick_stop,sr*60)
s_ticks = np.arange(int(1),int(1*tick_stop),1)

# Showing minutes instead of samples
ax.set_xticks(s_ticks_value)
ax.set_xticklabels(s_ticks)

# Displaying the plot
fig.tight_layout()
plt.show()

sf.write('./Files/output.wav', combined_audio, sr, 'PCM_16')
ipd.Audio(combined_audio, rate=sr)

## Vizualization

#### How the segments are re-sequenced
Here is a visualization of the first 100 segments after re-sequencing.

Two reasons as to why we're not visualizing every segment of the song:
1. It takes too much cpu and time to plot the whole song this way.
2. It looks nicer when 'zoomed' in, as you can see the source more clearly.

In [None]:
# Visualizing the first 100 segments

# Choosing style and colors for the plot
fig, ax = plt.subplots(figsize=(24, 6), facecolor='#d3fbb5', dpi=150)

# Setting up for iteration
combined_viz = np.array([])
x_offset = 0
counter = 0

# Iterating through the first 100 segments to plot by color-code
for index, row in final.iterrows():
    if counter > 100:
        break
    if row['id'] == 'rihanna':
        combined_viz = np.append(np.zeros(x_offset), rihannaClass.segments[index].segment)
        ax.plot(combined_viz, color='#faa39d')
        x_offset = len(combined_viz)
    elif row['id'] == 'cullum':
        combined_viz = np.append(np.zeros(x_offset), cullumClass.segments[index].segment)
        ax.plot(combined_viz, color='#fad29d')
        x_offset = len(combined_viz)
        counter += 1

# Setting labels and design
ax.set_title("Don't Stop The Music [segments]", fontsize='36', weight='roman', y=1.02, family='monospace')
ax.set_xmargin(0)
ax.axis('off')

# Adding legend and saving to .png before plotting
plt.legend(['– Rihanna', '– Cullum'], loc=(0.90, 0.95), fontsize='22', labelcolor=['#f8756c', '#f8bc6c'], handlelength=0)
plt.savefig('output.png', pad_inches=1)
plt.show()

#### The average (Mean) of the Central Spectroid
Visualizing the average of the central spectroid pr segment of each artist.

In [None]:
# Visualization the 'Scm' pr artist

fig, ax = plt.subplots(figsize=(24, 10), facecolor='#d3fbb5', dpi=150)

# Setting labels and design
ax.set_title("Mean Central Spectroid pr Segment", fontsize='36', weight='roman', y=1.03, family='monospace')
ax.plot(df_rihanna['Scm'], color='#faa39d', marker='p', mec='k', mew=0.4, mfc='#f8756c', ms=8, label='Rihanna')
ax.plot(df_cullum['Scm'], color='#fad29d', marker='^', mec='k', mew=0.4, mfc='#f6b154', ms=8, label='Cullum')
ax.set_xlabel('Segment (#)', fontsize='22', family='monospace')
ax.set_ylabel('Frequency (Hz)', fontsize='22', family='monospace')
ax.set_xmargin(0.005)
ax.set_facecolor('#e6f0fe')

# Changing y axis to logarithmic representation of frequency
ax.set_yscale('log')
y_ticks = [250, 500, 1000, 2000, 4000]
ax.set_yticks(y_ticks)
ax.set_yticklabels(y_ticks)

# Adding legend and saving to .png before plotting
ax.legend(fontsize='24', markerscale=1.8)
plt.savefig('output2.png')
plt.show()

## Statistics

In [None]:
# Making the Spectral Centroid Mean ('Scm' column) the source for statistics
r_stat = df_rihanna['Scm']
c_stat = df_cullum['Scm']

# Mean of each artist
r_mu = r_stat.mean()
c_mu = c_stat.mean()

# Standard deviation of each artist
r_sig = r_stat.std()
c_sig = c_stat.std()

# Max of each artist
r_max = r_stat.max()
c_max = c_stat.max()

# Min of each artist
r_min = r_stat.min()
c_min = c_stat.min()

# Range of each artist
r_range = r_max - r_min
c_range = c_max - c_min

# Number of bins
r_bin = len(r_stat)
c_bun = len(c_stat)

print("Rihanna:")
print("Min:    {:.2f}".format(r_min))
print("Max:   {:.2f}".format(r_max))
print("Range: {:.2f}".format(r_range))

print("\nCullum:")
print("Min:    {:.2f}".format(c_min))
print("Max:   {:.2f}".format(c_max))
print("Range: {:.2f}".format(c_range))

#### Statistics plot:
Showing the mean, standard deviation and range of Rihanna's & Cullum's Spectral Centroid Mean 

In [None]:
# Visualizing

fig, ax = plt.subplots(figsize=(16, 8), facecolor='#d3fbb5')

fig.suptitle("H i s t o g r a m", size='36', y=1.035, family='monospace')

# Common title
ax.set_title(r"Rihanna: $\mu={:.2f}$, $\sigma={:.2f}$   —   Cullum: $\mu={:.2f}$, $\sigma={:.2f}$".format(r_mu, r_sig, c_mu, c_sig), fontsize='20', y=1.04, family='monospace')
ax.set_facecolor('#fef9e6')
# Rihanna

# the histogram of the data
r_n, r_bins, r_patches = ax.hist(r_stat, bins=200, density=True, histtype='stepfilled', color='#f65f54', alpha=0.75)

# add a trendy line
r_y = ((1 / (np.sqrt(2 * np.pi) * r_sig)) *
     np.exp(-0.5 * (1 / r_sig * (r_bins - r_mu))**2))
ax.plot(r_bins, r_y, color='#f33224', ls='--', lw=3, label='Rihanna')

ax.vlines((r_min, r_max), 0, r_n.max(), color='#f33224', alpha=0.9,
           ls=':', lw=1.8, label='R-range')


# Cullum

# the histogram of the data
c_n, c_bins, c_patches = ax.hist(c_stat, bins=200, density=True, histtype='stepfilled', color='#54ebf6', alpha=0.75)

# add a trendy line
c_y = ((1 / (np.sqrt(2 * np.pi) * c_sig)) *
     np.exp(-0.5 * (1 / c_sig * (c_bins - c_mu))**2))
ax.plot(c_bins, c_y, color='#6ca8f7', ls='--', lw=3, label='Cullum')

ax.vlines((c_min, c_max), 0, c_n.max(), color='#6ca8f7', alpha=0.9,
           ls=':', lw=1.8, label='C-range')

ax.set_xlabel('Spectral Centroid Mean', fontsize='18', family='monospace')
ax.set_ylabel('Probability density', fontsize='18', family='monospace')

ax.set_xmargin(0.015)
ax.legend(fontsize='20')
plt.show()

## Metrics - stop
Displaying the run time.

In [None]:
# Stop timer
end = time.time()

In [None]:
# Calculate total run time
minutes = ((end-start) // 60)
seconds = ((end-start) % 60)

print("Total run time:\t{m:1.0f} minutes and {s:.2f} seconds".format(m = minutes, s = seconds))