In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import numpy.ma as ma

def call_dataset(game_name):
	# load data
	game_csv = game_name
	data = pd.read_csv(f'datasets/{game_csv}_tweets_datatset.csv',lineterminator='\n')

	data = data[data['text'].str.contains('I liked a YouTube')== False]
	data = data[data['text'].str.contains('I liked a @YouTube video')== False]
	data = data[data['text'].str.contains('I added a video to a @YouTube')== False]
	data = data[data['text'].str.contains('I added a video to a YouTube')== False]
	data = data[data['text'].str.contains('Giveaway')== False]

	# removing \r in 'sentiment'
	if 'sentiment\r' in data.columns:
		data['sentiment'] = data['sentiment\r'].apply(lambda x: x.replace('\r',''))
		data.drop(columns=['sentiment\r'],inplace=True)
	
	# removing first columns
	data.drop(data.columns[0], axis=1, inplace=True)

	# changing 'sentiment scores' from str to ndarray
	data['sentiment scores'] = data['sentiment scores'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))

	# changing 'tweet id' from float64 to str
	data['tweet id'] = data['tweet id'].apply(lambda x: str(int(x)))

	# adding sentiment scores
	sentiment_score = []
	sentiment_confidence = []
	for score in data['sentiment scores']:
		sentiment_score.append(score.argmax())
		sentiment_confidence.append(score[score.argmax()])
	
	data['sentiment score'] = sentiment_score
	data['sentiment confidence'] = sentiment_confidence

	# changing 'created at' date from str to datetime	
	data['created at'] = data['created at'].apply(lambda x: x.removesuffix('+00:00'))
	data['created at'] = data['created at'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
	data['date'] = data['created at'].apply(lambda x: datetime.date(x))
	
	# sorting from earliest to latest
	data.sort_values(by='created at', inplace=True)
	data.reset_index(drop=True, inplace=True)
	return data

In [2]:
xy = call_dataset('xy')
oras = call_dataset('oras')
sunmoon = call_dataset('sunmoon')
ultrasm = call_dataset('ultrasm')
letsgo = call_dataset('letsgo')
swsh = call_dataset('swsh')
swshdlc = call_dataset('swshdlc')
bdsp = call_dataset('bdsp')
arceus = call_dataset('arceus')
sv = call_dataset('sv')

In [3]:
def sentiment_figure(game_dataset):
    sentiment_per_day = game_dataset.groupby(['sentiment','date'], as_index=False).size()
    return sentiment_per_day

In [13]:
def calculate_metrics(df):
    total_number_of_tweets = len(df['text'])
    positive_percentage = 100*len(df[df['sentiment']=='Positive'])/len(df['sentiment'])
    neutral_percentage = 100*len(df[df['sentiment']=='Neutral'])/len(df['sentiment'])
    negative_percentage = 100*len(df[df['sentiment']=='Negative'])/len(df['sentiment'])
    data = [total_number_of_tweets, positive_percentage, neutral_percentage, negative_percentage]
    return data

In [16]:
metrics_list = []
frames_list = [xy, oras, sunmoon, ultrasm, letsgo, swsh, swshdlc, bdsp, arceus, sv]
for item in frames_list:
    metrics_list.append(calculate_metrics(item))

metrics_data = pd.DataFrame(metrics_list, columns = ['total_number_of_tweets','positive_percentage','neutral_percentage','negative_percentage'])

In [19]:
metrics_data.to_csv(f'datasets/games_metrics.csv',index=False)

In [20]:
placeholder_baby = pd.read_csv(f'datasets/games_metrics.csv')
placeholder_baby

Unnamed: 0,total_number_of_tweets,positive_percentage,neutral_percentage,negative_percentage
0,198330,48.537791,40.936822,10.525387
1,67004,37.220166,55.519073,7.260761
2,320430,45.593421,46.438848,7.967731
3,31397,41.054878,50.842437,8.102685
4,66758,68.78127,24.17538,7.043351
5,202527,46.857456,40.280555,12.861989
6,84750,41.303835,46.154572,12.541593
7,77113,75.637052,18.864523,5.498424
8,181977,50.575073,35.891349,13.533578
9,102457,46.948476,44.782689,8.268835


In [22]:
placeholder_baby.loc[0][1]

48.5377905511017

In [25]:
import json

with open(f"datasets/games_metadata.json") as file:
    game_metadata = json.load(file)

game_metadata = json.loads(game_metadata)
print(game_metadata)

NameError: name 'd' is not defined