In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Tourism

In [2]:
# Load the datasets
footballers_path = '../results/Footballers/feature_importance.xlsx'
interview_path = '../results/interview/Topics_Correlation.xlsx'
online_path = '../results/online/feature_importances_Tourism.xlsx'

# Read the Excel files
footballers_data = pd.read_excel(footballers_path)
interview_data = pd.read_excel(interview_path)
online_data = pd.read_excel(online_path)

In [3]:
# Standardizing column names for consistency
footballers_data = footballers_data.rename(columns={"name": "Feature", "Importance": "Score"})
interview_data = interview_data.rename(columns={"Topic": "Feature", "Pearson Correlation": "Score"})
online_data = online_data.rename(columns={online_data.columns[0]: "Feature", online_data.columns[1]: "Score"})

In [4]:
# Keeping only necessary columns
footballers_data = footballers_data[["Feature", "Score"]]
interview_data = interview_data[["Feature", "Score"]]
online_data = online_data[["Feature", "Score"]]

In [5]:
# Adding a source column to identify data origin
footballers_data["Source"] = "Footballers"
interview_data["Source"] = "Interview"
online_data["Source"] = "Online"

In [6]:
# Combine all data into a single dataframe
combined_data = pd.concat([footballers_data, interview_data, online_data], ignore_index=True)

In [7]:
# Normalizing the scores to a scale of 0 to 1
scaler = MinMaxScaler()
combined_data["Normalized_Score"] = scaler.fit_transform(combined_data[["Score"]])

In [8]:
combined_data

Unnamed: 0,Feature,Score,Source,Normalized_Score
0,Promoting gender equality,0.030525,Footballers,0.030575
1,Using sports to promote positive messages abou...,0.025514,Footballers,0.025555
2,Helping to challenge gender stereotypes and pr...,0.023663,Footballers,0.023702
3,peace,0.021558,Footballers,0.021593
4,Encouraging them to participate in peace initi...,0.020016,Footballers,0.020049
...,...,...,...,...
166,Suggestions_sports,0.000116,Online,0.000116
167,Suggestions_the,0.001523,Online,0.001526
168,Suggestions_tourism,0.000139,Online,0.000139
169,Suggestions_with,0.000060,Online,0.000060


In [9]:
# Grouping by feature and calculating the mean normalized score across sources
final_features = (
    combined_data.groupby("Feature")["Normalized_Score"]
    .mean()
    .reset_index()
    .sort_values(by="Normalized_Score", ascending=False)
)

# Display the top 20 most important features
print("Top 10 Features:")
print(final_features.head(20))

Top 10 Features:
                                               Feature  Normalized_Score
4                                    Cultural Exchange          0.986886
109              Peacebuilding and Tourism Development          0.982973
9                                            Education          0.979367
107                                      Peacebuilding          0.974424
48                                               Peace          0.972683
108  Peacebuilding Challenges and Tourism Developme...          0.969894
144                                Women's Empowerment          0.963001
142                                Tourism Development          0.962598
118                                   Sports Diplomacy          0.960117
7     Development of Sports and Tourism Infrastructure          0.959914
141                                            Tourism          0.939304
110                     Political and Legal Challenges          0.933882
5                       Cultural a

In [10]:
final_features.to_excel('../results/combine/Tourism.xlsx')

# Peace

In [11]:
# Load the datasets
footballers_path = '../results/Footballers/feature_importance.xlsx'
interview_path = '../results/interview/Topics_Correlation.xlsx'
online_path = '../results/online/feature_importances_Peace.xlsx'

# Read the Excel files
footballers_data = pd.read_excel(footballers_path)
interview_data = pd.read_excel(interview_path)
online_data = pd.read_excel(online_path)

In [12]:
# Standardizing column names for consistency
footballers_data = footballers_data.rename(columns={"name": "Feature", "Importance": "Score"})
interview_data = interview_data.rename(columns={"Topic": "Feature", "Pearson Correlation": "Score"})
online_data = online_data.rename(columns={online_data.columns[0]: "Feature", online_data.columns[1]: "Score"})

In [13]:
# Keeping only necessary columns
footballers_data = footballers_data[["Feature", "Score"]]
interview_data = interview_data[["Feature", "Score"]]
online_data = online_data[["Feature", "Score"]]

In [14]:
# Adding a source column to identify data origin
footballers_data["Source"] = "Footballers"
interview_data["Source"] = "Interview"
online_data["Source"] = "Online"

In [15]:
# Combine all data into a single dataframe
combined_data = pd.concat([footballers_data, interview_data, online_data], ignore_index=True)

In [16]:
# Normalizing the scores to a scale of 0 to 1
scaler = MinMaxScaler()
combined_data["Normalized_Score"] = scaler.fit_transform(combined_data[["Score"]])

In [17]:
combined_data

Unnamed: 0,Feature,Score,Source,Normalized_Score
0,Promoting gender equality,0.030525,Footballers,0.030575
1,Using sports to promote positive messages abou...,0.025514,Footballers,0.025555
2,Helping to challenge gender stereotypes and pr...,0.023663,Footballers,0.023702
3,peace,0.021558,Footballers,0.021593
4,Encouraging them to participate in peace initi...,0.020016,Footballers,0.020049
...,...,...,...,...
152,Suggestions_sports,0.000038,Online,0.000038
153,Suggestions_the,0.004998,Online,0.005006
154,Suggestions_tourism,0.000946,Online,0.000947
155,Suggestions_with,0.002205,Online,0.002208


In [18]:
# Grouping by feature and calculating the mean normalized score across sources
final_features = (
    combined_data.groupby("Feature")["Normalized_Score"]
    .mean()
    .reset_index()
    .sort_values(by="Normalized_Score", ascending=False)
)

# Display the top 10 most important features
print("Top 10 Features:")
print(final_features.head(20))

Top 10 Features:
                                               Feature  Normalized_Score
4                                    Cultural Exchange          0.986886
52               Peacebuilding and Tourism Development          0.982973
9                                            Education          0.979367
50                                       Peacebuilding          0.974424
48                                               Peace          0.972683
51   Peacebuilding Challenges and Tourism Developme...          0.969894
130                                Women's Empowerment          0.963001
85                                 Tourism Development          0.962598
61                                    Sports Diplomacy          0.960117
7     Development of Sports and Tourism Infrastructure          0.959914
84                                             Tourism          0.939304
53                      Political and Legal Challenges          0.933882
5                       Cultural a

In [18]:
final_features.to_excel('../results/combine/Peace.xlsx')

# Media

In [19]:
# Load the datasets
footballers_path = '../results/Footballers/feature_importance.xlsx'
interview_path = '../results/interview/Topics_Correlation.xlsx'
online_path = '../results/online/feature_importances_Media.xlsx'

# Read the Excel files
footballers_data = pd.read_excel(footballers_path)
interview_data = pd.read_excel(interview_path)
online_data = pd.read_excel(online_path)

In [20]:
# Standardizing column names for consistency
footballers_data = footballers_data.rename(columns={"name": "Feature", "Importance": "Score"})
interview_data = interview_data.rename(columns={"Topic": "Feature", "Pearson Correlation": "Score"})
online_data = online_data.rename(columns={online_data.columns[0]: "Feature", online_data.columns[1]: "Score"})

In [21]:
# Keeping only necessary columns
footballers_data = footballers_data[["Feature", "Score"]]
interview_data = interview_data[["Feature", "Score"]]
online_data = online_data[["Feature", "Score"]]

In [22]:
# Adding a source column to identify data origin
footballers_data["Source"] = "Footballers"
interview_data["Source"] = "Interview"
online_data["Source"] = "Online"

In [23]:
# Combine all data into a single dataframe
combined_data = pd.concat([footballers_data, interview_data, online_data], ignore_index=True)

In [24]:
# Normalizing the scores to a scale of 0 to 1
scaler = MinMaxScaler()
combined_data["Normalized_Score"] = scaler.fit_transform(combined_data[["Score"]])

In [25]:
combined_data

Unnamed: 0,Feature,Score,Source,Normalized_Score
0,Promoting gender equality,0.030525,Footballers,0.030575
1,Using sports to promote positive messages abou...,0.025514,Footballers,0.025555
2,Helping to challenge gender stereotypes and pr...,0.023663,Footballers,0.023702
3,peace,0.021558,Footballers,0.021593
4,Encouraging them to participate in peace initi...,0.020016,Footballers,0.020049
...,...,...,...,...
189,Suggestions_sports,0.000658,Online,0.000659
190,Suggestions_the,0.000014,Online,0.000014
191,Suggestions_tourism,0.000006,Online,0.000006
192,Suggestions_with,0.001068,Online,0.001070


In [26]:
# Grouping by feature and calculating the mean normalized score across sources
final_features = (
    combined_data.groupby("Feature")["Normalized_Score"]
    .mean()
    .reset_index()
    .sort_values(by="Normalized_Score", ascending=False)
)

# Display the top 10 most important features
print("Top 10 Features:")
print(final_features.head(20))

Top 10 Features:
                                               Feature  Normalized_Score
4                                    Cultural Exchange          0.986886
89               Peacebuilding and Tourism Development          0.982973
9                                            Education          0.979367
87                                       Peacebuilding          0.974424
28                                               Peace          0.972683
88   Peacebuilding Challenges and Tourism Developme...          0.969894
167                                Women's Empowerment          0.963001
122                                Tourism Development          0.962598
98                                    Sports Diplomacy          0.960117
7     Development of Sports and Tourism Infrastructure          0.959914
121                                            Tourism          0.939304
90                      Political and Legal Challenges          0.933882
5                       Cultural a

In [27]:
final_features.to_excel('../results/combine/Media.xlsx')

# challenges 

In [28]:
# Load the datasets
footballers_path = '../results/Footballers/feature_importance.xlsx'
interview_path = '../results/interview/Topics_Correlation.xlsx'
online_path = '../results/online/feature_importances_challenge.xlsx'

# Read the Excel files
footballers_data = pd.read_excel(footballers_path)
interview_data = pd.read_excel(interview_path)
online_data = pd.read_excel(online_path)

In [29]:
# Standardizing column names for consistency
footballers_data = footballers_data.rename(columns={"name": "Feature", "Importance": "Score"})
interview_data = interview_data.rename(columns={"Topic": "Feature", "Pearson Correlation": "Score"})
online_data = online_data.rename(columns={online_data.columns[0]: "Feature", online_data.columns[1]: "Score"})

In [30]:
# Keeping only necessary columns
footballers_data = footballers_data[["Feature", "Score"]]
interview_data = interview_data[["Feature", "Score"]]
online_data = online_data[["Feature", "Score"]]

In [31]:
# Adding a source column to identify data origin
footballers_data["Source"] = "Footballers"
interview_data["Source"] = "Interview"
online_data["Source"] = "Online"

In [32]:
# Combine all data into a single dataframe
combined_data = pd.concat([footballers_data, interview_data, online_data], ignore_index=True)

In [33]:
# Normalizing the scores to a scale of 0 to 1
scaler = MinMaxScaler()
combined_data["Normalized_Score"] = scaler.fit_transform(combined_data[["Score"]])

In [34]:
combined_data

Unnamed: 0,Feature,Score,Source,Normalized_Score
0,Promoting gender equality,0.030525,Footballers,0.030575
1,Using sports to promote positive messages abou...,0.025514,Footballers,0.025555
2,Helping to challenge gender stereotypes and pr...,0.023663,Footballers,0.023702
3,peace,0.021558,Footballers,0.021593
4,Encouraging them to participate in peace initi...,0.020016,Footballers,0.020049
...,...,...,...,...
189,Suggestions_sports,0.000658,Online,0.000659
190,Suggestions_the,0.000014,Online,0.000014
191,Suggestions_tourism,0.000006,Online,0.000006
192,Suggestions_with,0.001068,Online,0.001070


In [35]:
# Grouping by feature and calculating the mean normalized score across sources
final_features = (
    combined_data.groupby("Feature")["Normalized_Score"]
    .mean()
    .reset_index()
    .sort_values(by="Normalized_Score", ascending=False)
)

# Display the top 10 most important features
print("Top 10 Features:")
print(final_features.head(20))

Top 10 Features:
                                               Feature  Normalized_Score
4                                    Cultural Exchange          0.986886
89               Peacebuilding and Tourism Development          0.982973
9                                            Education          0.979367
87                                       Peacebuilding          0.974424
28                                               Peace          0.972683
88   Peacebuilding Challenges and Tourism Developme...          0.969894
167                                Women's Empowerment          0.963001
122                                Tourism Development          0.962598
98                                    Sports Diplomacy          0.960117
7     Development of Sports and Tourism Infrastructure          0.959914
121                                            Tourism          0.939304
90                      Political and Legal Challenges          0.933882
5                       Cultural a

In [36]:
final_features.to_excel('../results/combine/Challenges.xlsx')