In [24]:
# Normalization of the ticket prices

import pandas as pd

df = pd.read_csv('all_festivals_ticket_prices.csv')

# Group by festival and calculate the average price for all ticket types
df_avg_prices = df.groupby('Festival')['Preço'].mean().reset_index()
df_avg_prices.rename(columns={'Preço': 'Average Price'}, inplace=True)

# Calculate the minimum and maximum average prices
min_price = df_avg_prices['Average Price'].min()
max_price = df_avg_prices['Average Price'].max()

# Normalize the average prices with reverse normalization
df_avg_prices['Normalized Price'] = ((max_price - df_avg_prices['Average Price']) / (max_price - min_price)) * 100

# Sort by the 'Average Price' in descending order
df_avg_prices_sorted = df_avg_prices.sort_values(by='Normalized Price', ascending=False)

# Display it
display(df_avg_prices_sorted)

Unnamed: 0,Festival,Average Price,Normalized Price
4,Vodafone Paredes de Coura,87.318333,100.0
1,NOS Alive,128.714286,67.555681
0,MEO Sudoeste,132.666667,64.457979
3,Super Bock Super Rock,132.85,64.314291
2,Rock in Rio,214.909091,0.0


In [28]:
# Normalization of sentiment scores

import pandas as pd
import os

# Read sentiment analysis data for each festival
sentiment_files = ['comments_analysis_nos_alive.csv', 'comments_analysis_RIR.csv', 'comments_analysis_sbsr.csv', 'comments_analysis_sudoeste.csv', 'comments_analysis_vodafone.csv']  
df_avg_sentiment_list = []

# Calculate average sentiment score for each festival and put it on a list df_avg_sentiment_list
for file in sentiment_files:
    df = pd.read_csv(file)
    avg_sentiment = df['Sentiment Score'].mean()  # Calculate average sentiment score
    festival_name = os.path.splitext(os.path.basename(file))[0].split('_')[-1]  # Extract festival name from the filename
    df_avg_sentiment_list.append({'Festival': festival_name, 'Average Sentiment Score': avg_sentiment}) 

# Create a dataframe from the list of average sentiment scores
df_avg_sentiment = pd.DataFrame(df_avg_sentiment_list)

# Normalize the average sentiment scores
min_sentiment = df_avg_sentiment['Average Sentiment Score'].min()
max_sentiment = df_avg_sentiment['Average Sentiment Score'].max()
df_avg_sentiment['Normalized Sentiment'] = ((df_avg_sentiment['Average Sentiment Score'] - min_sentiment) / (max_sentiment - min_sentiment)) * 100

# Replace the festival names that were extracted from the file with the full names
df_avg_sentiment['Festival'] = df_avg_sentiment['Festival'].replace({
    'alive': 'NOS Alive',
    'RIR': 'Rock in Rio',
    'sbsr': 'Super Bock Super Rock',
    'sudoeste': 'MEO Sudoeste',
    'vodafone': 'Vodafone Paredes de Coura'
})

# Sort the dataframe by 'Average Sentiment Score' in descending order
df_avg_sentiment_sorted = df_avg_sentiment.sort_values(by='Normalized Sentiment', ascending=False)

# Display it
display(df_avg_sentiment_sorted)


Unnamed: 0,Festival,Average Sentiment Score,Normalized Sentiment
1,Rock in Rio,0.053254,100.0
4,Vodafone Paredes de Coura,0.038785,62.807908
3,MEO Sudoeste,0.031614,44.375205
2,Super Bock Super Rock,0.018895,11.683008
0,NOS Alive,0.01435,0.0


In [2]:
# Normalization of Engagement Rates

import pandas as pd

df_twitter = pd.read_csv("festivals_twitter_engagement.csv")
df_instagram = pd.read_csv("festivals_instagram_engagement.csv")

# Rename the columns that were 'Engagement Rate' to 'Twitter Engagement Rate' or 'Instagram Engagement Rate' to indicate the platform
df_twitter.rename(columns={'Engagement Rate': 'Twitter Engagement Rate'}, inplace=True)
df_instagram.rename(columns={'Engagement Rate': 'Instagram Engagement Rate'}, inplace=True)

# Merge the two dataframes on the 'Festival' column
df_engagement = pd.merge(df_twitter, df_instagram, on='Festival')

# Calculate the average engagement score for each festival
df_engagement['Average Engagement'] = (df_engagement['Twitter Engagement Rate'] + df_engagement['Instagram Engagement Rate']) / 2

# Normalize the average engagement scores
min_engagement = df_engagement['Average Engagement'].min()
max_engagement = df_engagement['Average Engagement'].max()
df_engagement['Normalized Engagement'] = ((df_engagement['Average Engagement'] - min_engagement) / (max_engagement - min_engagement)) * 100

# Sort the dataframe by 'Average Engagement' in descending order
df_engagement_sorted = df_engagement.sort_values(by='Normalized Engagement', ascending=False)

# Display it
display(df_engagement_sorted)

Unnamed: 0,Festival,Twitter Engagement Rate,Instagram Engagement Rate,Average Engagement,Normalized Engagement
2,Rock in Rio,0.592022,252.92152,126.756771,100.0
0,MEO Sudoeste,1.944388,128.627012,65.2857,37.819307
4,Vodafone Paredes de Coura,0.137769,116.523081,58.330425,30.78374
3,Super Bock Super Rock,0.32346,92.961248,46.642354,18.960743
1,NOS Alive,0.349921,55.446072,27.897996,0.0


In [6]:
# Normalization of Onstrategy consulting
import pandas as pd

data = pd.read_csv(r'C:\Users\a1927\Desktop\MESTRADO\Semester 2\BIG DATA\Group project\onstrategy data.csv', delimiter=';',  decimal=',')


# Normalize using the max-min technique
data['avg_normalized'] = (data['AVG'] - data['AVG'].min()) / (data['AVG'].max() - data['AVG'].min()) * 100

data.head()


Unnamed: 0,MUSIC EVENTS,2021,2022,AVG,avg_normalized
0,NOS ALIVE,73.5,76.4,74.95,100.0
1,ROCK IN RIO,74.1,73.6,73.85,68.115942
2,MEO SUDOESTE,72.5,72.8,72.65,33.333333
3,SUPER BOCK SUPER ROCK,70.8,72.2,71.5,0.0
4,VODAFONE PAREDES DE COURA,71.4,71.7,71.55,1.449275


In [4]:
# Normalization of popularity data

sorted_artists = pd.read_csv(r"C:\Users\a1927\Downloads\sorted_artists.xls")

# Group the artists by festival and calculate the average number of listeners
avg_listeners_by_festival = sorted_artists.groupby('Festival')['listeners_lastfm'].mean()

# Normalize the average listeners per festival using the Min-Max technique
min_value = avg_listeners_by_festival.min()
max_value = avg_listeners_by_festival.max()
normalized_listeners = ((avg_listeners_by_festival - min_value) / (max_value - min_value)) * 100

normalized_df = pd.DataFrame({'Festival': normalized_listeners.index, 'Normalized Listeners Avg': normalized_listeners.values})
normalized_df_sorted = normalized_df.sort_values(by='Normalized Listeners Avg', ascending=False)

# Add the average Listener column
normalized_df_sorted['Average Listener'] = avg_listeners_by_festival.values

# Reorganize
normalized_df_sorted = normalized_df_sorted[['Festival', 'Average Listener', 'Normalized Listeners Avg']]

display(normalized_df_sorted)


Unnamed: 0,Festival,Average Listener,Normalized Listeners Avg
1,NOS Alive,193108.8,100.0
2,Rock in Rio,1756518.4,45.937069
3,Super Bock Super Rock,880732.0,3.808622
0,MEO Sudoeste,198276.95,3.489589
4,Vodafone Paredes de Coura,136579.6,0.0
