# Cleaning Survey Questions (Multiple Choice)

This notebook cleans the multiple choice questions in preparation for unsupervised learning clustering.

## Importing packages and simplifying questions

In [1]:
#import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
from kmodes.kmodes import KModes
warnings.filterwarnings("ignore")

In [2]:
#load survey data
master_df = pd.read_csv('survey_response_mc.csv')

#add user id and rename columns
master_df = master_df.reset_index().rename(columns = {
    'index': 'user_id',
    'Timestamp': 'timestamp',
    'Have you, at any point in time, played gacha games?': 'user_game_status',
    'How long have you been playing gacha games?': 'user_game_duration',
    'What is your current favorite gacha game?': 'game_title_new',
    'What mobile operating system do you use to play this game?': 'game_os_new',
    'Which version of the game do you play?': 'game_version_new',
    'How did you first hear about/start playing this game?': 'game_intro_new',
    'Estimate how much time you spend on this game per week.': 'game_play_consistency_new',
    "How does this game make you feel when you're playing it?": 'game_feel_new',
    'What do you like the most about this game? What motivates you to continue playing this game?': 'game_likemost_new',
    'What do you like the least about this game? Is there any area you think this game should be improved in?': 'game_likeleast_new',
    'Have you completed any in-app transactions (using real world currency)?': 'game_transaction_new',
    'What is the reason you chose to be Free-to-Play (F2P) for this game?': 'game_f2p_new',
    'Based on your level of spending in this game, What type of paying gamer would you categorize yourself?': 'game_fishchart_new',
    'What would you say is the biggest difference for your in-game experience in comparison to a F2P (free-to-play) player?': 'game_spender_new',
    'TO THE BEST OF YOUR KNOWLEDGE, how much have you spent in total on this gacha game? (in US Dollars)': 'game_spending_new',
    'TO THE BEST OF YOUR KNOWLEDGE, how much do you spend on this gacha game during a month with a high-profile event (anniversary, new years, etc.) (in US Dollars)': 'game_spending_event_new',
    'Name your current favorite gacha game.': 'game_title',
    'What mobile operating system do you use to play this game?.1': 'game_os',
    'Which version of the game do you play?.1': 'game_version',
    'How did you first hear about/start playing this game?.1': 'game_intro',
    'Estimate how much time you spend on this game per week..1': 'game_play_consistency',
    "How does this game make you feel when you're playing it?.1": 'game_feel',
    'What do you like the most about this game? What motivates you to continue playing this game?.1': 'game_likemost',
    'What do you like the least about this game? Is there any area you think this game should be improved in?.1': 'game_likeleast',
    'Have you completed any in-app transactions (using real world currency)?.1': 'game_transaction',
    'What is the reason you chose to be Free-to-Play (F2P) for this game?.1': 'game_f2p',
    'Based on your level of spending in this game, What type of paying gamer would you categorize yourself?.1': 'game_fishchart',
    'What would you say is the biggest difference for your in-game experience in comparison to a F2P (free-to-play) player?.1': 'game_spender',
    'TO THE BEST OF YOUR KNOWLEDGE, how much have you spent on this gacha game in total? (in US Dollars)': 'game_spending',
    'TO THE BEST OF YOUR KNOWLEDGE, how much do you spend on this gacha game during a month with a high-profile event (anniversary, new years, etc.) (in US Dollars).1': 'game_spending_event',
    'Do you have a once-favorite gacha game that you decided to stop playing?': 'user_status_past',
    'What was your once-favorite gacha game that you decided to stop playing?': 'game_title_past',
    'You decided to stop playing this game due to the following factors:': 'game_stop_past',
    'What did you miss the most about your once-favorite gacha game? How did make you feel when you were playing it? ': 'game_miss_past',
    'What did you like the least about this game? How did that make you feel?': 'game_likeleast_past',
    'How long had you been playing this once-favorite gacha game before you stopped?': 'game_duration_past',
    'Have you completed any in-app transactions (using real world currency)?.2': 'game_transaction_past',
    'TO THE BEST OF YOUR KNOWLEDGE, how much have you spent in this game in total? (in US Dollar)': 'game_spending_past',
    'Is there a particular reason why you have not played gacha games?': 'user_notplay',
    'Have you played other types and formats of games?': 'user_notplay_othergame_status',
    'If you answered yes above, list some of the game types you play or have played before.': 'user_notplay_othergame',
    'Have you played other types and formats of games besides gacha?': 'user_othergame_status',
    'If you answered yes above, list some of the game types you play or have played before..1': 'user_othergame',
    'What is your age range?': 'age', 
    'What is your gender?': 'gender',
    'What is your PRIMARY content source (2D culture format)?': 'content',
    'Which continent are you from?': 'continent'}
    )

#load genre data
genre_df = pd.read_csv('title_to_genre.csv')

## Creating DataFrame

In [3]:
# create dataframe for multiple choice
df = master_df[['user_id', 'timestamp', 'user_game_status', 'user_game_duration',
                
                'game_title_new', 'game_os_new', 'game_version_new', 'game_intro_new',
                'game_transaction_new', 'game_fishchart_new',

                'game_title', 'game_os', 'game_version', 'game_intro', 
                'game_transaction', 'game_fishchart',

                'game_title_past', 'game_stop_past',

                'user_notplay', 'user_notplay_othergame_status', 'user_notplay_othergame',
                'user_othergame_status', 'user_othergame', 

                'age', 'gender', 'content','continent'
                ]]

## Cleaning New Players Variables

In [4]:
#cleaning user_game_duration variable
df['user_game_duration'] = master_df['user_game_duration']
df['user_game_duration'] = df['user_game_duration'].mask((df['user_game_duration'] == '7 years') |
                                                         (df['user_game_duration'] == 'at least around 7 years') |
                                                         (df['user_game_duration'] == "I play off and on, I've played various Gacha games over 4 years, but stopped playing for a while. Just came back to a couple within the past month.") |
                                                         (df['user_game_duration'] == '6 years') |
                                                         (df['user_game_duration'] == '10 years') |
                                                         (df['user_game_duration'] == '10 years (on and off)') |
                                                         (df['user_game_duration'] == 'I often play a game for 1->2 months then rest for one month. Then repeat.'), 
                                                         '> 4 years')

In [5]:
#cleaning game_title_new variable
df['game_title_new']= master_df['game_title_new']
df['game_title_new'] = df['game_title_new'].mask((df['game_title_new'] == 'Twisted Wonderland') |
                                                 (df['game_title_new'] == 'Disney Twisted Wonderland') |
                                                 (df['game_title_new'] == 'Twisted Wonderland ') |
                                                 (df['game_title_new'] == 'Disney Twisted Wonderland '), 
                                                 'Disney: Twisted-Wonderland')
df['game_title_new'] = df['game_title_new'].mask((df['game_title_new'] == 'Another Edeb'), 
                                                 'Another Eden')
df['game_title_new'] = df['game_title_new'].mask((df['game_title_new'] == 'Awaken: Chaos era'), 
                                                 'Awaken: Chaos Era')

In [6]:
#cleaning game_os_new variable
df['game_os_new'] = master_df['game_os_new']
df['game_os_new'] = df['game_os_new'].mask((df['game_os_new'] == 'Android and iOS(2 accounts)'),
                                           'Both')
df['game_os_new'] = df['game_os_new'].mask((df['game_os_new'] == 'None'),
                                           'Neither')
df['game_os_new'] = df['game_os_new'].mask((df['game_os_new'] == 'Playstation 5') |
                                           (df['game_os_new'] == 'Nintendo Switch'),
                                           'Console (PS4, PS5)')
df['game_os_new'] = df['game_os_new'].mask((df['game_os_new'] == 'PC'),
                                           'PC (Browser, Steam)')

In [7]:
#cleaning game_version_new variable
df['game_version_new'] = master_df['game_version_new']
df['game_version_new'] = df['game_version_new'].mask((df['game_version_new'] == 'both global and Japanese'),
                                                     'Both')
df['game_version_new'] = df['game_version_new'].mask((df['game_version_new'] == 'English'),
                                                     'Global')
df['game_version_new'] = df['game_version_new'].mask((df['game_version_new'] == 'Korean'),
                                                     'Other')

In [8]:
#cleaning game_intro_new variable
df['game_intro_new'] = master_df['game_intro_new']
df['game_intro_new'] = df['game_intro_new'].mask((df['game_intro_new'] == 'Online friend drew characters from it and posted it on discord. One character seemed interesting so i searched for it.') |
                                                 (df['game_intro_new'] == 'Saw its merchandise'),
                                                 'Liked the official art or fan art/Creation')
df['game_intro_new'] = df['game_intro_new'].mask((df['game_intro_new'] == 'VTuber stream') |
                                                 (df['game_intro_new'] == 'Influencer'),
                                                 'Youtube/Twitch content creator showed me')

In [9]:
#cleaning game_fishchart_new variable
df['game_fishchart_new'] = master_df['game_fishchart_new']
df['game_fishchart_new'] = df['game_fishchart_new'].mask(master_df['game_transaction_new'] == 'No', 'F2P')

## Cleaning Current Longtime Players Variables

In [10]:
#cleaning game_title variable
df['game_title'] = master_df['game_title']
df['game_title'] = df['game_title'].str.strip()
df['game_title'] = df['game_title'].str.title()

df['game_title'] = df['game_title'].mask(df['game_title'] == 'Afk Arena', 
                                         'AFK Arena')
df['game_title'] = df['game_title'].mask((df['game_title'] == 'Another Eden The Cat Beyond Time And Space') |
                                         (df['game_title'] == 'Another Eden, The Cat Beyond Time And Space') |
                                         (df['game_title'] == 'Another Eden: Global') |
                                         (df['game_title'] == 'Another Eden: The Cat Beyond Time And Space'), 
                                         'Another Eden')
df['game_title'] = df['game_title'].mask((df['game_title'] == 'Bang Dream') |
                                         (df['game_title'] == 'Bang Dream! Girls Band Party!') |
                                         (df['game_title'] == 'Bang Dream, But Also A Couple On This List A Like A Lot,'), 
                                         'BanG Dream! Girls Band Party!')
df['game_title'] = df['game_title'].mask((df['game_title'] == 'Bleach Brave Souls'),
                                         'Bleach: Brave Souls')
df['game_title'] = df['game_title'].mask((df['game_title'] == 'Bleach: Immortal Souls'),
                                         'Bleach: Immortal Soul')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  "Can'T Choose: Arknights, Azur Lane - Played Genshin A Ton, Kinda Stopped Now."),
                                         'Arknights')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Captain Tsubasa Dream Team'),
                                         'Captain Tsubasa: Dream Team')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Cookie Run Kingdom') |
                                         (df['game_title'] ==  'Cookie Run: Kingdoms'),
                                         'Cookie Run: Kingdom')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Counter: Side'),
                                         'Counter:Side')
df['game_title'] = df['game_title'].mask((df['game_title'] == 'Disney Twisted Wonderland') |
                                         (df['game_title'] == 'Disney Twisted-Wonderland') |
                                         (df['game_title'] == "Disney'S Twisted Wonderland") |
                                         (df['game_title'] == 'Disney’S Twisted Wonderland') |
                                         (df['game_title'] == 'Twisted Wonderland'), 
                                         'Disney: Twisted-Wonderland')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Dissidia') |
                                         (df['game_title'] ==  'Dissidia: Opera Omnia'),
                                         'Dissidia Final Fantasy Opera Omnia')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Ensemble Stars') |
                                         (df['game_title'] ==  'Ensemble Stars (Music And Basic)'),
                                         'Ensemble Stars!!')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Fate Grand Order'),
                                         'Fate/Grand Order')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Final Fanatsy Barve Exvius') |
                                         (df['game_title'] == 'Swgoh Ffbe'),
                                         'Final Fantasy Brave Exvius')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Final Fantasy: Record Keeper'),
                                         'Final Fantasy Record Keeper')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Finalfantasy War Of The Vision') |
                                         (df['game_title'] ==  'Final Fantasy War Of The Visions'),
                                         'Final Fantasy War of the Visions')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  "Girls' Frontline"),
                                         'Girls Frontline')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Guardian Tale'),
                                         'Guardian Tales')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Honkai Impact 3Rd'),
                                         'Honkai Impact 3rd')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'I Play Both Seven Deadly Sins Grand Cross And Dragonball Legends, Not Dokkan'),
                                         'The Seven Deadly Sins Grand Cross')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Idolmaster Cinderella Girls Starlight Stage'),
                                         'The Idolm@ster Cinderella Girls: Starlight Stage')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Kings Raid'),
                                         "King's Raid")
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Kof All Stars') | 
                                         (df['game_title'] ==  'Kof Allstars'),
                                         'The King Of Fighters All Star')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Langrisser'),
                                         'Langrisser Mobile')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Lord Of Heroes And Figure Fantasy'),
                                         'Lord Of Heroes')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Love Live School Idol Festival'),
                                         'Love Live! School Idol Festival')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Mahoyaku'),
                                         'Promise of Wizard')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Marvel Future Fight') |
                                         (df['game_title'] ==  'Marvel Future Fight (Kinda Gacha)') |
                                         (df['game_title'] ==  'Future Fight'),
                                         'Marvel: Future Fight')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Nier Reincarnation'),
                                         'NieR Re[in]carnation')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Obey Me') |
                                         (df['game_title'] ==  'Obey Me!'),
                                         'Obey Me! One Master to Rule Them All')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'One Piece Tc') |
                                         (df['game_title'] ==  'One Piece Treasure Cruise (Bountry Rush Is Ok Too But Optc>Opbr)') |
                                         (df['game_title'] ==  'One Piece Tresure Cruise') |
                                         (df['game_title'] ==  'One Piece: Treasure Cruise') |
                                         (df['game_title'] ==  'Optc'),
                                         'One Piece Treasure Cruise')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Pokemon Masters Ex'),
                                         'Pokemon Masters EX')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Project Sekai/Hatsune Miku: Colorful Stage') |
                                         (df['game_title'] ==  'Project Sekai: Colorful Stage! Feat. Hatsune Miku'),
                                         'Project Sekai: Colorful Stage! feat. Hatsune Miku')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Pgr, Arknights, And Genshin') |
                                         (df['game_title'] ==  'Punishing Gray Raven') |
                                         (df['game_title'] ==  'Punishing Gray Raven, Action Taimanin'),
                                         'Punishing: Gray Raven')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Revue Starlight Re Live'),
                                         'Shoujo Kageki Revue Starlight: Re LIVE')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Romancing Saga Reuniverse') |
                                         (df['game_title'] ==  'Romancing:Saga Re Universe'),
                                         'Romancing SaGa Re;univerSe')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Tears Of Themis'),
                                         'Tears of Themis')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'That Time I Got Reincarnated As A Slime: Isekai Memories'),
                                         'SLIME - ISEKAI Memories')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Tokyo Afterschool Summoners (Lifewonders)'),
                                         'Tokyo Afterschool Summoners')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Touhou Lostword'),
                                         'Touhou LostWord')
df['game_title'] = df['game_title'].mask((df['game_title'] ==  'ウマ娘 プリティーダービー'),
                                         'Uma Musume: Pretty Derby')

df['game_title'] = df['game_title'].mask((df['game_title'] ==  'Hate Them All Equally But Also Love Them All Equally') |
                                         (df['game_title'] == "I Hate Them All, I'M Just Playing Because I Spent Too Much Time In Them"),
                                         'Other')

In [11]:
#cleaning game_os variable
df['game_os'] = master_df['game_os']
df['game_os'] = df['game_os'].str.strip()
df['game_os'] = df['game_os'].str.title()

df['game_os'] = df['game_os'].mask((df['game_os'] == 'Android & Bluestacks') |
                                   (df['game_os'] == 'Android (Other Gacha) And Ps4 For Genshin') |
                                   (df['game_os'] == 'Android And Pc') |
                                   (df['game_os'] == 'Android,Pc') |
                                   (df['game_os'] == 'Pc, Would Be Android If Only Mobile Allowed'),
                                   'Android')
df['game_os'] = df['game_os'].mask((df['game_os'] == 'Ios') |
                                   (df['game_os'] == 'Ios & Windows Pc') |
                                   (df['game_os'] == 'Ios And Pc') |
                                   (df['game_os'] == 'Ios With An Emulator For Pc') |
                                   (df['game_os'] == 'Ios, Mac') |
                                   (df['game_os'] == 'Ipad'),
                                   'iOS')
df['game_os'] = df['game_os'].mask((df['game_os'] == "I'Ve Used Both.") |
                                   (df['game_os'] == 'Ios And Android') |
                                   (df['game_os'] == 'Ios As Well As Windows 10 And Android') |
                                   (df['game_os'] == 'Use Both Ios And Android'),
                                   'Both')
df['game_os'] = df['game_os'].mask((df['game_os'] == 'Android Emulation On Pc') |
                                   (df['game_os'] == 'Android Emulator (Pc) & Mobile') |
                                   (df['game_os'] == 'Android Emulator') |
                                   (df['game_os'] == 'Android Emulator (Pc) & Mobile') |
                                   (df['game_os'] == 'Bluestacks') |
                                   (df['game_os'] == 'Bluestacks Or Pc Client') |
                                   (df['game_os'] == 'Android Emulator') |
                                   (df['game_os'] == 'Emulator') |
                                   (df['game_os'] == 'Emulator (Noxplayer)') |
                                   (df['game_os'] == 'Emulator Bluestacks And Android') |
                                   (df['game_os'] == 'Emulator On Windows') |
                                   (df['game_os'] == 'Emulators') |
                                   (df['game_os'] == 'Bluestacks On Pc') |
                                   (df['game_os'] == 'Laptop With Android Emulator') |
                                   (df['game_os'] == "Pc, Using An Emulator (Bluestacks). However, It'S Emulating An Android."),
                                   'PC (Android emulation: Bluestacks, Nox)')
df['game_os'] = df['game_os'].mask((df['game_os'] == 'Ps4') |
                                   (df['game_os'] == 'Ps4 For Genshin Impact') |
                                   (df['game_os'] == 'Ps4 Pro') |
                                   (df['game_os'] == 'Ps4/Ps5, Pc (Windows)'),
                                   'Console (PS4, PS5)')
df['game_os'] = df['game_os'].mask((df['game_os'] == 'Android And Pc (Primarily, The Latter)') |
                                   (df['game_os'] == 'Browser') |
                                   (df['game_os'] == 'Computah') |
                                   (df['game_os'] == 'Computer') |
                                   (df['game_os'] == 'Computer Browser') |
                                   (df['game_os'] == 'Dmm Launcher For Computer') |
                                   (df['game_os'] == "Don'T Play On Mobile") |
                                   (df['game_os'] == 'Don’T Play On Mobile, Pc') |
                                   (df['game_os'] == 'I Play It On Pc') |
                                   (df['game_os'] == 'I Play It On Pc Actually') |
                                   (df['game_os'] == 'I Play On Pc') |
                                   (df['game_os'] == 'I Use Pc') |
                                   (df['game_os'] == "I Don'T Play It On A Mobile Device, I Play On Pc With It'S Steam Version") |
                                   (df['game_os'] == 'Pc') |
                                   (df['game_os'] == 'Pc (& Ios)') |
                                   (df['game_os'] == 'Pc (Windows)') |
                                   (df['game_os'] == 'Pc And Android') |
                                   (df['game_os'] == 'Pc Browser') |
                                   (df['game_os'] == 'Pc Master Race') |
                                   (df['game_os'] == 'Pc Only') |
                                   (df['game_os'] == 'Pc Only. Not Gonna Play Mobile') |
                                   (df['game_os'] == 'Pc Use To Play On Android') |
                                   (df['game_os'] == 'Pc Windows') |
                                   (df['game_os'] == 'Pc, Android') |
                                   (df['game_os'] == 'Pc, Not Mobile') |
                                   (df['game_os'] == 'Pc, Why Is It Not An Option You Dumbfuck...') |
                                   (df['game_os'] == "Pc. I Don'T Download Gatcha Games On Mobile To Limit Playtime.") |
                                   (df['game_os'] == 'Steam On Pc') |
                                   (df['game_os'] == 'Steam/Pc Client') |
                                   (df['game_os'] == 'Windows') |
                                   (df['game_os'] == 'Windows 10 (Pc)') |
                                   (df['game_os'] == 'Windows. Fuck Smartphones') |
                                   (df['game_os'] == 'Mac; Any Browser Works. Google, Brave, Safari, Opera Gx Etc.'),
                                   'PC (Browser, Steam)') 

In [12]:
#cleaning game_version variable
df['game_version'] = master_df['game_version']
df['game_version'] = df['game_version'].str.strip()
df['game_version'] = df['game_version'].str.title()

df['game_version'] = df['game_version'].mask((df['game_version'] == 'American?') |
                                             (df['game_version'] == 'En') |
                                             (df['game_version'] == 'Eu') |
                                             (df['game_version'] == 'Europe') |
                                             (df['game_version'] == 'European') |
                                             (df['game_version'] == 'Euroup (Europe)') |
                                             (df['game_version'] == 'Jp And Global Servers') |
                                             (df['game_version'] == 'North America') |
                                             (df['game_version'] == 'Server Is Merged Since End Of 2021, But Before It Was Global') |
                                             (df['game_version'] == "There'S Only One Server") |
                                             (df['game_version'] == 'Us') |
                                             (df['game_version'] == 'Us Version'),
                                             'Global')
df['game_version'] = df['game_version'].mask((df['game_version'] == 'Both But Mainly Jp') |
                                             (df['game_version'] == "It'S A Japanese Game That Is Fully Translated To English With The Option To Switch Back And Forth Freely.") |
                                             (df['game_version'] == 'Japanese And Chinese'),
                                             'Japanese')
df['game_version'] = df['game_version'].mask((df['game_version'] == 'Both (And Also Other Versions If Available)') |
                                             (df['game_version'] == 'Both Global And Japanese') |
                                             (df['game_version'] == 'Both Global/Jp') |
                                             (df['game_version'] == 'Both, Along With Taiwan Server, And Hopefully Korea Server Once It Comes Online') |
                                             (df['game_version'] == 'Both, One Of Them Is Granblue Fantasy, Which Only Have Jp Server But Have English Language Option') |
                                             (df['game_version'] == 'Global + Japanese') |
                                             (df['game_version'] == "I'Ve Played Both For Various Games") ,
                                             'Both')
df['game_version'] = df['game_version'].mask((df['game_version'] == 'Chinese') |
                                             (df['game_version'] == 'Cn Server (For The Newest Events)') |
                                             (df['game_version'] == 'Korean') |
                                             (df['game_version'] == 'Sar') |
                                             (df['game_version'] == 'Sea') |
                                             (df['game_version'] == 'Taiwanese Server'),
                                             'Other')

In [13]:
#cleaning game_intro variable
df['game_intro'] = master_df['game_intro']
df['game_intro'] = df['game_intro'].str.strip()
df['game_intro'] = df['game_intro'].str.title()

df['game_intro'] = df['game_intro'].mask((df['game_intro'] == 'A Content Creator Showed It To Me On Twitch') |
                                         (df['game_intro'] == 'A Streamer I Watch Started Playing It (Twitchtv)') |
                                         (df['game_intro'] == 'Followed A Youtuber That Started Playing The Game') |
                                         (df['game_intro'] == 'Gacha Game Youtubers') |
                                         (df['game_intro'] == "Gigguk'S Video") |
                                         (df['game_intro'] == 'Nijisanji Sponsored Stream') |
                                         (df['game_intro'] == 'Through Yugioh Streamers') |
                                         (df['game_intro'] == 'Twitch') |
                                         (df['game_intro'] == 'Twitch Streamer') |
                                         (df['game_intro'] == 'Twitch Streamers') |
                                         (df['game_intro'] == 'Youtube') |
                                         (df['game_intro'] == 'Youtube And Pixiv') |
                                         (df['game_intro'] == 'Youtube Comment') |
                                         (df['game_intro'] == 'Youtube Content Creators') |
                                         (df['game_intro'] == 'Youtube Video In My Recommended') |
                                         (df['game_intro'] == 'Youtube Videos') |
                                         (df['game_intro'] == 'Youtuber Advice') |
                                         (df['game_intro'] == 'Youtuber I Knew Played It') |
                                         (df['game_intro'] == 'Youtube Video Recommendation') |
                                         (df['game_intro'] == 'Saw A Streamer I Liked Playing It') |
                                         (df['game_intro'] == 'Youtuber Promoted On Their Channel') |
                                         (df['game_intro'] == 'Saw Twitch Streamers Playing It') |
                                         (df['game_intro'] == 'Saw Youtube Footage Of The Game') |
                                         (df['game_intro'] == 'Sponsored Youtube Video') |
                                         (df['game_intro'] == 'Streamer Played It, Liked The Gameplay') |
                                         (df['game_intro'] == 'Saw Gameplay On Youtube And Pre-Registered / Played On Launch'),
                                         'Youtube/Twitch content creator showed me')
df['game_intro'] = df['game_intro'].mask((df['game_intro'] == 'Anime') |
                                         (df['game_intro'] == 'Anime Adaptation') |
                                         (df['game_intro'] == 'Anime Released A Gacha Game') |
                                         (df['game_intro'] == 'Anime When I Was Young') |
                                         (df['game_intro'] == 'Found It By Watching The Anime') |
                                         (df['game_intro'] == 'Known The Franchise Because Of The Anime') |
                                         (df['game_intro'] == "Searched It Myself After Watching One Piece. I Do This For All The Anime I Watch, First Watch The Anime Then Search For It'S Mobile Game If They Have It") |
                                         (df['game_intro'] == 'Through The Anime') |
                                         (df['game_intro'] == 'Watch One Piece Anime') |
                                         (df['game_intro'] == 'Watched The Anime, Got Interested And Went Searching For Games Related To It') |
                                         (df['game_intro'] == 'Watched The Anime Ver. Of It'),
                                         'Knew or saw the anime')
df['game_intro'] = df['game_intro'].mask((df['game_intro'] == 'Fan Of The Franchise') |
                                         (df['game_intro'] == 'I Follow Yugioh! Closely And Am Part Of The Community') |
                                         (df['game_intro'] == 'I Play Fire Emblem') |
                                         (df['game_intro'] == 'I Searched For One Piece Games Bc I Liked One Piece') |
                                         (df['game_intro'] == 'I Watch The Fate/ Series') |
                                         (df['game_intro'] == "I'Ve Been A Long Time Fan Of The Series. I Discovered That There Was An Announcement Of The Ip To Make A Gacha And Was Immediately On-Board.") |
                                         (df['game_intro'] == "Liked The Series And Was Curious If There'S A Game, If Not More Based On It.") |
                                         (df['game_intro'] == 'Long Time Fe Fan') |
                                         (df['game_intro'] == "Memes And I'M An Already Established Fate Fan"),
                                         'Fan of the franchise')
df['game_intro'] = df['game_intro'].mask((df['game_intro'] == 'Searched One Piece In App Store') |
                                         (df['game_intro'] == 'Wanted A One Piece Game') |
                                         (df['game_intro'] == 'Searched For A One Piece Game') |
                                         (df['game_intro'] == 'Searched For It Because Of Op Anime.') |
                                         (df['game_intro'] == 'Looking For A Dbz Game On Mobile') |
                                         (df['game_intro'] == 'Searched For Marvel Games In The App Store') |
                                         (df['game_intro'] == 'Kept Up With Game Since Day 1') |
                                         (df['game_intro'] == 'Looked For A One Piece Game In The App Store'),
                                         'Searched franchise in app store')
df['game_intro'] = df['game_intro'].mask((df['game_intro'] == 'App Store') |
                                         (df['game_intro'] == 'Appeared In The App Store And I Liked The Frog.') |
                                         (df['game_intro'] == 'Found Through App Store Recommendation') |
                                         (df['game_intro'] == 'Found Random In App Store') |
                                         (df['game_intro'] == 'I Normally Look For Pre Register Games On The App Store So I Can Play Current Games'),
                                         'Found through app store recommendation')
df['game_intro'] = df['game_intro'].mask((df['game_intro'] == 'Participate To The Beta') |
                                         (df['game_intro'] == 'Played Console Counterpart') |
                                         (df['game_intro'] == 'Played Other Ganes In The Series') |
                                         (df['game_intro'] == 'Played The Beta'),
                                         'Played previous games in the franchise/Played beta')
df['game_intro'] = df['game_intro'].mask((df['game_intro'] == 'Friend Of Mine Installed It To My Phone') |
                                         (df['game_intro'] == 'Friends') |
                                         (df['game_intro'] == 'From Friend And App Store Recommendation') |
                                         (df['game_intro'] == 'My Friend Harassed Me Till I Played It') |
                                         (df['game_intro'] == 'Word Of Mouth From Someone I Know') |
                                         (df['game_intro'] == 'Watched The World Championships With My Friend'),
                                         'Word of mouth from someone I know')
df['game_intro'] = df['game_intro'].mask((df['game_intro'] == 'Discord Bot') |
                                         (df['game_intro'] == 'Gachagaming Subreddit') |
                                         (df['game_intro'] == 'I Saw It On Reddit Before It Was Released On The Seven Deadly Sins Subreddit And I Downloaded It Day Okie Of Release.') |
                                         (df['game_intro'] == 'Pinterest') |
                                         (df['game_intro'] == 'Reddit') |
                                         (df['game_intro'] == "Reddit And Twitter Before The Game Released But Playing Seriously Since July After Looking At Their Youtube Channel And The Old Event, Actually I Picked The Game Again Because Of Kal'Tsit Va") |
                                         (df['game_intro'] == 'Saw A Post From An User On Twitter') |
                                         (df['game_intro'] == 'Social Media And Youtube Videos') |
                                         (df['game_intro'] == 'Twitter Post'),
                                         'Saw on social media (Reddit, Twitter, Etc.)')
df['game_intro'] = df['game_intro'].mask((df['game_intro'] == 'Was There When Nintendo First Announced Feh.') |
                                         (df['game_intro'] == 'Watched The 2017 Nintendo Direct.') |
                                         (df['game_intro'] == 'Been Following Hoyoverse Since Honkai Came Out In Global So I Heard About It Directly From Them') |
                                         (df['game_intro'] == 'Saw The Jp Release'),
                                         'Company announcement')
df['game_intro'] = df['game_intro'].mask((df['game_intro'] == 'Gbvs Mas Announced') |
                                         (df['game_intro'] == 'Learned Of It From A Pg3D Free Gem Thing') |
                                         (df['game_intro'] == 'Through A Crossover With A Franchise I Like') |
                                         (df['game_intro'] == 'Through Genshin Impact') |
                                         (df['game_intro'] == 'I Was Looking For More Mihoyo Games After Playing Genshin, And Fell In Love With The Game And Fu Hua.'),
                                         'Introduced by other games')
df['game_intro'] = df['game_intro'].mask((df['game_intro'] == "Found By Accident When Searching For Music By One Of The Game'S Composers (Yasunori Mitsuda)") |
                                         (df['game_intro'] == 'Heard Its Soundtrack On Youtube') |
                                         (df['game_intro'] == 'I Found One Of The Songs On Youtube And Said “Why The Fuck Not Lets Play This”') |
                                         (df['game_intro'] == 'Saw A Song Preview On Youtube'),
                                         'Liked the music')
df['game_intro'] = df['game_intro'].mask((df['game_intro'] == 'Doujin') |
                                         (df['game_intro'] == 'Doujins') |
                                         (df['game_intro'] == 'Fan Art') |
                                         (df['game_intro'] == 'Fanart') |
                                         (df['game_intro'] == 'Fanart Of The Characters') |
                                         (df['game_intro'] == 'Found Fanarts Of The Game On Danbooru') |
                                         (df['game_intro'] == 'Found Cute Online Images, So Decided To Download Game.') |
                                         (df['game_intro'] == 'Saw A Gif If The S3 Animations And Wanted To Check It Out') |
                                         (df['game_intro'] == 'Saw A Speed Paint Of A Character From That Game') |
                                         (df['game_intro'] == 'Saw Some Art From The Game And Did Some Research.') |
                                         (df['game_intro'] == 'The Doujins') |
                                         (df['game_intro'] == 'When I Saw A Really Hot Ship Girl') |
                                         (df['game_intro'] == 'My Waifu'),
                                         'Liked the official art or fan art/Creation')

df['game_intro'] = df['game_intro'].mask((df['game_intro'] == 'Read About It Online'),
                                         'Read about it online')
df['game_intro'] = df['game_intro'].mask((df['game_intro'] == 'Saw An Advertisement'),
                                         'Saw an advertisement')

df['game_intro'] = df['game_intro'].mask((df['game_intro'] == "Don'T Remember, Sorry") |
                                         (df['game_intro'] == "I Don'T Remember") |
                                         (df['game_intro'] == 'No Longer Remember, It Was 6 Years Ago') |
                                         (df['game_intro'] == '') |
                                         (df['game_intro'] == '') |
                                         (df['game_intro'] == ''),
                                         'Do not recall')
df['game_intro'] = df['game_intro'].mask((df['game_intro'] == "Already Knew It Existed But Didn'T Get Interested At The Beginning") |
                                         (df['game_intro'] == 'Cbt') |
                                         (df['game_intro'] == 'I Saw Someone Playing It Over Their Shoulder') |
                                         (df['game_intro'] == 'Sandy Cult') |
                                         (df['game_intro'] == 'Searched For It On My Own') |
                                         (df['game_intro'] == 'Found It Myself'),
                                         'Other')

In [14]:
#cleaning game_fishchart variable
df['game_fishchart'] = master_df['game_fishchart']
df['game_fishchart'] = df['game_fishchart'].str.strip()
df['game_fishchart'] = df['game_fishchart'].str.title()

df['game_fishchart'] = df['game_fishchart'].mask((df['game_fishchart'] == 'I Have Not Spent Money On This Game But Another') |
                                                 (df['game_fishchart'] == 'None') |
                                                 (df['game_fishchart'] == 'On The Game I Chose For My Favourite I Spend Zero, But On A Other Game I Spend 5Usd.'),
                                                 'F2P')
df['game_fishchart'] = df['game_fishchart'].mask(master_df['game_transaction'] == 'No', 
                                                 'F2P')
df['game_fishchart'] = df['game_fishchart'].mask((df['game_fishchart'] == 'I Only Spend My Google Play Cash That I Get From Doing Google Surveys. I Do Not Spend Real Money.') |
                                                 (df['game_fishchart'] == 'Only Spend Money Earned By Doing Google Rewards, Etc.') |
                                                 (df['game_fishchart'] == ''),
                                                 'F2P')
df['game_fishchart'] = df['game_fishchart'].mask((df['game_fishchart'] == 'I Actually Spent More Money On Genshin Lol, But I Only Paid For Guaranteed Banners In Fgo') |
                                                 (df['game_fishchart'] == "I Don'T Spend Money Unless It'S A Guaranteed New Character And It'S Less Than $5.") |
                                                 (df['game_fishchart'] == "I Don'T Spend. I Cannot. Like, Cannot Cannot. Only Once, 6$ Or So On Arknights.") |
                                                 (df['game_fishchart'] == 'I Only Bougth Once In All My Time') |
                                                 (df['game_fishchart'] == 'I Only Spent A Huge Amount (500 Cad) Once And Have Never Spent Money Since Then. It Was To Get A Unit I Grinded To Get For Weeks But Could Not Get. So, I Gave In To Desire. Ever Since Then  I Have Never Spent Any Money.') |
                                                 (df['game_fishchart'] == 'I Only Spent Once 8€') |
                                                 (df['game_fishchart'] == 'I Spent 1 Time A Little Bit Because The Value Was Right Only For That One Purchase And The Amount Coincided With What I Thought The Developers Deserved.') |
                                                 (df['game_fishchart'] == 'I Spent 20€. Not Making That Mistake Again.') |
                                                 (df['game_fishchart'] == 'I Spent Once Twice') |
                                                 (df['game_fishchart'] == 'Only Sde (Select Characters, You Can Pick One Characters)') |
                                                 (df['game_fishchart'] == 'Only Spent 70€ One Time, Never Again') |
                                                 (df['game_fishchart'] == 'Only Spent Once') |
                                                 (df['game_fishchart'] == "Tough To Tell Due To Spending Only Happens From Time To Time And I Only Purchase Suprprise Tickets That Comes Out From Time To Time. And Even Then, I Don'T Purchase Every Single Surprise Tickets Out There."),
                                                 'Minnow (I Spend A Little Bit)')
df['game_fishchart'] = df['game_fishchart'].mask((df['game_fishchart'] == '<100 Usd Total Within ~3 Years, Probably Minnow Or Below') |
                                                 (df['game_fishchart'] == 'About 10 Bucks A Month') |
                                                 (df['game_fishchart'] == 'Basically None, Will Go And Tell The Rest Below.') |
                                                 (df['game_fishchart'] == "I Don'T Know What These Terms Mean. I Spend $5 Per Month") |
                                                 (df['game_fishchart'] == 'I Have Spent Some Money On The Game But Not Consitantlly, Only On Birthdays And Holidays Like Christmas.') |
                                                 (df['game_fishchart'] == "I Only Spend Money On This Game When There'S A Cheap Value Pack (Roughly $5 Usd), Which Is Only Once Every Few Years.") |
                                                 (df['game_fishchart'] == 'I Spent A Total Of About 20 Dollars Since I Started The Game A Couple Of Years Ago') |
                                                 (df['game_fishchart'] == 'If I Buy At All It’S During An Anniversary, And Even Then I Spent Very Little') |
                                                 (df['game_fishchart'] == 'Ive Spent About $20 On The Game In 3 Years Of Playing, So I Wouldnt Classify Myself As Any Of These To Be Honest') |
                                                 (df['game_fishchart'] == 'Little Or No Money Spend') |
                                                 (df['game_fishchart'] == 'Mostly Minnow With Occasional Splurges') |
                                                 (df['game_fishchart'] == 'Not Consistently, Only Occasionally Small Amounts.') |
                                                 (df['game_fishchart'] == 'Only Spend With Limited Characters In Arknights And In Genshin Only Pay The Monthly Gems') |
                                                 (df['game_fishchart'] == 'Originally Niap (1+ Year); "Grew" From Minnow To An Occasional Whale; Finally Gave Up Whale Status And Reverted Back To Minnow (~$10/Month)') |
                                                 (df['game_fishchart'] == 'Pretty Much Free To Play, I Have Spent $20 In The Past Three Years.') |
                                                 (df['game_fishchart'] == 'Sometimes F2P, Sometimes Minnow, Sometimes Dolphin. For This Game, Minnow') |
                                                 (df['game_fishchart'] == 'You Should Have Included Money Brackets Instead Of These Meaningless Labels, What Is A Whale To Me Might Not Necessary Be The Same For Other People. Anyway, I Spent Around 30€ In The 3 Years I Played Al.'),
                                                 'Minnow (I Spend A Little Bit)')
df['game_fishchart'] = df['game_fishchart'].mask((df['game_fishchart'] == '50-150€ By Month') |
                                                 (df['game_fishchart'] == "About 70 A Month Unless There'S A Cosmetic Package. +100 For The Package.") |
                                                 (df['game_fishchart'] == 'Between Minnow And Dolphin') |
                                                 (df['game_fishchart'] == 'Depends On The Game But Sometimes Im A Minnow, Sometimes A Dolphin') |
                                                 (df['game_fishchart'] == "It Depends; I Used To Be Able To Spend $35+ A Month When I Was Really Enjoying The Game, But Recently I'Ve Moved Into Low Spending Dolphin/Minnow (But I Bought A Discount 'Monthly Boost' For The Year A While Back And Haven'T Really Spent Any Funds Recently)") |
                                                 (df['game_fishchart'] == 'Somewhere Between Minnow And Dolphin. I Have Spent Money But Not A Great Amount And Over A Long Period Of Time') |
                                                 (df['game_fishchart'] == 'Used To Be A Minnow, I Just Recently Entered Into A Subscription So A Dolphin Now') |
                                                 (df['game_fishchart'] == 'Was A Dolphin Now Ftp'),
                                                 'Dolphin (I Spend A Moderate Amount)')
df['game_fishchart'] = df['game_fishchart'].mask((df['game_fishchart'] == 'Ive Spent Around 9Grand And Ive Been Playing Game Since Launch(7Years)So Not Sure What To Classify That As') |
                                                 (df['game_fishchart'] == "People Have Called Me Space Or Galactic Whale In All The Games I'Ve Played.") |
                                                 (df['game_fishchart'] == "Pseudo-Whale. I Have Every Character, But I Don'T Have Every Character Maxed. Makes A Huge Difference In End Game Content As Well As Skill Gap Issues.") |
                                                 (df['game_fishchart'] == 'I Was A Whale And Then I Stopped') |
                                                 (df['game_fishchart'] == 'Used To Be A Whale, Now No Longer Spend Money') |
                                                 (df['game_fishchart'] == 'Used To Spend But Stopped') |
                                                 (df['game_fishchart'] ==  "What'S Higher Than Whale? Kraken? Yeah Sounds Good I'Ll Go With That."),
                                                 'Whale (I Spend A Lot)')
df['game_fishchart'] = df['game_fishchart'].mask(df['game_fishchart'] == 'Dolphin (I Spend A Moderate Amount)',
                                                       'Dolphin (I spend a moderate amount)')
df['game_fishchart'] = df['game_fishchart'].mask(df['game_fishchart'] == 'Minnow (I Spend A Little Bit)',
                                                       'Minnow (I spend a little bit)')
df['game_fishchart'] = df['game_fishchart'].mask(df['game_fishchart'] == 'Whale (I Spend A Lot)',
                                                       'Whale (I spend a lot)')

## Cleaning Past Game Variables

In [15]:
#cleaning game_title_past variable
df['game_title_past'] = master_df['game_title_past']
df['game_title_past'] = df['game_title_past'].str.strip()
df['game_title_past'] = df['game_title_past'].str.title()

df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] == 'A Digimon Game. I Forgot The Name') |
                                                   (df['game_title_past'] == 'Digimon Rearise') |
                                                   (df['game_title_past'] == 'Digimon: Rearise'), 
                                                   'Digimon ReArise')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] == "A Lot Of Them, But For The Sake Of This Question, Let'S Say Genshin"), 
                                                   'Genshin Impact')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] == 'A3!') |
                                                   (df['game_title_past'] == 'A3, Act Addict Actors'), 
                                                   'Act! Addict! Actors!')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] == 'Afk Arena'), 
                                                   'AFK Arena')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] == 'Age Of Ishtaria'), 
                                                   'Age of Ishtaria')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] == 'Alchemist’S Code') |
                                                   (df['game_title_past'] == 'The Alchemists Code'), 
                                                   'The Alchemist Code')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] == 'Alchemy Star'), 
                                                   'Alchemy Stars')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] == 'All Love Live! Games'), 
                                                   'Love Live! School Idol Festival')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] == 'Animal Crossing'), 
                                                   'Animal Crossing: Pocket Camp')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] == 'Bang Dream') | 
                                                   (df['game_title_past'] == 'Bang Dream!') |
                                                   (df['game_title_past'] == 'Bang Dream! Girls Band Party!'), 
                                                   'BanG Dream! Girls Band Party!')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] == 'Battle Cats'), 
                                                   'The Battle Cats')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] == 'Blaz Blue'), 
                                                   'BlazBlue Alternative: Dark War')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] == 'Bleach Brave Souls'), 
                                                   'Bleach: Brave Souls')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] == 'Brave Frontier Rpg') |
                                                   (df['game_title_past'] == 'Brave Frontier Rpg (Eu Server)') |
                                                   (df['game_title_past'] == 'Brave Frontier, Valiant Force'), 
                                                   'Brave Frontier')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] == 'Bts World'), 
                                                   'BTS World')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] == 'Cookie Run Kingdom'), 
                                                   'Cookie Run: Kingdom')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] == 'Counter Side') |
                                                   (df['game_title_past'] == 'Counter:Side And Genshin'), 
                                                   'Counter:Side')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  "Crusader'S Quest"), 
                                                   'Crusaders Quest')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  "Damn Dude, You Don'T Even Have It, Shows How Bad It Is. Final Fantasy Brave Exvius") |
                                                   (df['game_title_past'] ==  'Ff Brave Exvius') |
                                                   (df['game_title_past'] ==  'Ffbe') |
                                                   (df['game_title_past'] ==  'Ffbe (Final Fantasy Brave Exvius)') |
                                                   (df['game_title_past'] ==  'Final Fantasy Brave Exvius (The Original, Not Wotv)'), 
                                                   'Final Fantasy Brave Exvius')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Defenders Of Texile'), 
                                                   'Defender of Texel')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Dengeki Bunko Crossing Void'), 
                                                   'Crossing Void')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Dissidia Final Fantasy: Opera Omnia') |
                                                   (df['game_title_past'] ==  'Dissidia Final Fantasy Opera Onnia'), 
                                                   'Dissidia Final Fantasy Opera Omnia')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Dokkan Battle Global'), 
                                                   'Dragon Ball Z Dokkan Battle')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Dragon Ball Legend'), 
                                                   'Dragon Ball Legends')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Dragon Mania'), 
                                                   'Dragon Mania Legends')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Dragon Quest Stars'), 
                                                   'Dragon Quest of the Stars')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Ffrk') |
                                                   (df['game_title_past'] ==  'Record Keeper'), 
                                                   'Final Fantasy Record Keeper')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Gbf') |
                                                   (df['game_title_past'] == 'Grand Blue Fantasy'), 
                                                   'Granblue Fantasy')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Grand Chase Gacha'), 
                                                   'Grand Chase')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Hero'), 
                                                   'MHA: The Strongest Hero')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Honkai Impact 3Rd'), 
                                                   'Honkai Impact 3rd')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'I Forgot Its Name, Super Monster Or Something. I Was A Game On Microsoft Store'), 
                                                   'Monster Super League')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'I Forgot The Name'), 
                                                   'Do Not Recall')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'I-Chu, I-Chu Etoile Stage, Yumeiro Cast'), 
                                                   'I-Chu')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Identity V; Tears Of Themis'), 
                                                   'Identity V')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Isekai Memorys') |
                                                   (df['game_title_past'] == 'Slime - Isekai Memories'), 
                                                   'SLIME - ISEKAI Memories')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  "Jojo'S Bizarre Adventures: Diamond Records"), 
                                                   "JoJo's Bizarre Adventures: Diamond Records")
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Kancolle') |
                                                   (df['game_title_past'] ==  'Kantai Collection.'), 
                                                   'Kantai Collection')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'King Of Prism -Prism Rush Live-'), 
                                                   'King of Prism: Prism Rush! Live')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  "King'S Raid") |
                                                   (df['game_title_past'] == 'Kings Raid'), 
                                                   "King's Raid")
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Kingdom Hearts Union Χ [Cross]') |
                                                   (df['game_title_past'] ==  'Kingdom Hearts Ux'), 
                                                   'Kingdom Hearts Union Cross X')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Konosuba') |
                                                   (df['game_title_past'] ==  'Konosuba: Fantastic Days'), 
                                                   'KonoSuba: Fantastic Days')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Langrisser'), 
                                                   'Langrisser Mobile')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Legendary Game Of Heroes'), 
                                                   'Legendary: Game Of Heroes')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Love Live All Stars'), 
                                                   'Love Live! All Stars')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Love Live School Idol Festival') |
                                                   (df['game_title_past'] ==  'Love Live Sif') |
                                                   (df['game_title_past'] ==  'Love Live! School Idol Festival!') |
                                                   (df['game_title_past'] ==  'Love Live: School Idol Festival') |
                                                   (df['game_title_past'] ==  'School Idol Festival (Sif)'), 
                                                   'Love Live! School Idol Festival')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Love Nikki') |
                                                   (df['game_title_past'] ==  'Love Nikki-Dress Up Queen'), 
                                                   'Love Nikki-Dress UP Queen')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Marvel Contest Of Champions And Final Fantasy: Brave Exvius'), 
                                                   'Marvel Contest Of Champions')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Marvel Future Fight') |
                                                   (df['game_title_past'] ==  'Mff'), 
                                                   'Marvel: Future Fight')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Memorial Freese'), 
                                                   'Danmachi Memorial Freese')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Mha: The Strongest Hero'), 
                                                   'MHA: The Strongest Hero')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Naruto Blazing') |
                                                   (df['game_title_past'] ==  'Naruto Shippuden Ultimate Ninja Blazing') |
                                                   (df['game_title_past'] ==  'Naruto Ultimate Ninja Blazing'), 
                                                   'Naruto Shippuden: Ultimate Ninja Blazing')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Nier') |
                                                   (df['game_title_past'] ==  'Nier Reincarnation'), 
                                                   'NieR Re[in]carnation')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Nightzero Mistiltein'), 
                                                   'NightZero:Mistiltein')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Obey Me'), 
                                                   'Obey Me! One Master to Rule Them All')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Omg How Girls Frontline Its Not In The List And Yeah This Is The Game That I Stop Playing'), 
                                                   'Girls Frontline')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'One Piece Treasure Cruise (After Years I Decided Play Again)') |
                                                   (df['game_title_past'] ==  'One Piece Treasure Cruise, Dbz Dokkan Battle, Db Legends') |
                                                   (df['game_title_past'] ==  'One Piece Treasure Curise') |
                                                   (df['game_title_past'] ==  'One Piece: Treasure Cruise') |
                                                   (df['game_title_past'] == 'Why Isnt One Piece Treasure Cruise On The List? Lel'), 
                                                   'One Piece Treasure Cruise')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Otogi Spirit Agents'), 
                                                   'Otogi: Spirit Agents')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Phantasy Star'), 
                                                   'Idola Phantasy Star Saga')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Pokemon Dual'), 
                                                   'Pokemon Duel')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Pokemon Masters Ex'), 
                                                   'Pokemon Masters EX')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Puni Puni') |
                                                   (df['game_title_past'] ==  'Yo-Kai Watch Wibble Wobble') |
                                                   (df['game_title_past'] ==  'Yo Kai Watch Puni Puni'), 
                                                   'Yo-kai Watch: Wibble Wobble')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Punishing Grey Raven'), 
                                                   'Punishing: Grey Raven')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Puzzle And Dragons Then Fire Emblem'), 
                                                   'Puzzle & Dragons')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Raid Shadow Legends'), 
                                                   'Raid: Shadow Legends')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Romancing Saga Re:Universe') |
                                                   (df['game_title_past'] ==  'Romancing Saga Re;Universe'), 
                                                   'Romancing SaGa Re;univerSe')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Saint Seiya Awakening'), 
                                                   'Saint Seiya Awakening: Knights of the Zodiac')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Sao Md') |
                                                   (df['game_title_past'] ==  'Sao: Memory Defrag') |
                                                   (df['game_title_past'] == 'Sword Art Online Memory Defrag'), 
                                                   'Sword Art Online: Memory Defrag')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Shin Megami Tensei Dx2') |
                                                   (df['game_title_past'] ==  'Shin Megami Tensei: Dx2') |
                                                   (df['game_title_past'] ==  'Smt Dx2'), 
                                                   'Dx2 Shin Megami Tensei: Liberation')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Soccer Spririt'), 
                                                   'Soccer Spririts')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Star Girl Fashion Cocoppa Play'), 
                                                   'Star Girl Fashion CocoPPa Play')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] == "Summoner'S Board"), 
                                                   'Summons Board')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Tokyo Ghoul Darkwar'), 
                                                   'Tokyo Ghoul: Dark War')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'Touken Ranbu (Idk If This Counts As A Gacha) / Idolish7'), 
                                                   'Touken Ranbu')
df['game_title_past'] = df['game_title_past'].mask((df['game_title_past'] ==  'グリモアA～私立グリモワール魔法学園～'), 
                                                   'Grimoire Magical Academy')

## Cleaning Play Other Games Variables

In [16]:
#cleaning user_notplay variable
df['user_notplay'] = master_df['user_notplay']
df['user_notplay'] = df['user_notplay'].str.strip()
df['user_notplay'] = df['user_notplay'].str.title()
df['user_notplay'] = df['user_notplay'].mask(df['user_notplay'] == 'I Will Probably Be Addicted And Over Spending',
                                             'I Do Not Want To Encourage Gambling Behavior.')

In [17]:
# cleaning user_othergame variable
df['user_othergame'] = master_df['user_othergame']
df['user_othergame'] = df['user_othergame'].str.strip()

df['user_othergame'] = df['user_othergame'].mask((df['user_othergame'] == 'Browser Games') |
                                                 (df['user_othergame'] == 'Visual Novel Games'),
                                                 'PC games')
df['user_othergame'] = df['user_othergame'].mask(df['user_othergame'] == "All Fromsoft games that isn't demon souls (and more ARPG's). Mobas. Fps. Fighting games. Strategy games. Rouge-likes. Open worlds.",
                                                 'Console games')
df['user_othergame'] = df['user_othergame'].mask((df['user_othergame'] == 'All of the above') |
                                                 (df['user_othergame'] == 'PC, Mobile and Console (from sega saturn to Ps3)') |
                                                 (df['user_othergame'] == 'all of the above, at least once'),
                                                 'Other mobile games, Console games, PC games')

df['user_othergame'] = df['user_othergame'].str.split(', ')

for i in range(len(df['user_othergame'])):
    if type(df['user_othergame'][i]) != float:
        df['user_othergame'][i] = [elem for elem in df['user_othergame'][i] if elem in ['Other mobile games', 'Console games', 'PC games']]
df['user_othergame'].loc[df['user_othergame'].isnull()] = df['user_othergame'].loc[df['user_othergame'].isnull()].apply(lambda x: [])

## Cleaning Demographic Questions

In [18]:
#cleaning age variable
df['age'] = df['age'].mask(df['age'] == 'above 31', '31 and above')
df['age'] = df['age'].mask(df['age'] == "I'm 17", 'Under 18')
df['age'] = df['age'].mask(df['age'] == "17 but will turn 18 in less than 4 weeks.", 'Under 18')
df['age'] = df['age'].mask(df['age'] == "Turning 31 in a month ", '25-30')
df['age'] = df['age'].mask(df['age'] == "31 exactly", '31 and above')
df['age'] = df['age'].mask(df['age'] == "40", '31 and above')
df['age'] = df['age'].mask(df['age'] == "16", 'Under 18')

In [19]:
#cleaning gender variable
df['gender'] = df['gender'].mask(df['gender'] == 'Gender is the least of my concern, but technically male', 'Male')
df['gender'] = df['gender'].mask((df['gender'] != 'Female') & (df['gender'] != 'Male') & (df['gender'] != "I'd rather not say.") & (~df['gender'].isnull()), 'Non-binary')

In [20]:
#cleaning content variable
df['content'] = df['content'].mask((df['content'] == "This question is weird and I don't understand if it's meant to be asking something in relation to the specific game (Puzzle and Dragons). If so, I guess anime, maybe? I do read some manga too, but I mean, what is this question even?") |
                                   (df['content'] == "Don't understand the question, but of the 4 previous given answers I enjoy anime the most.") |
                                   (df['content'] == "I'm quite casual with anime related things since 5-6 years ago. ") |
                                   (df['content'] == 'some anime/VN/original IP/live action'), 
                                   'Anime')
df['content'] = df['content'].mask((df['content'] == "I don't really read manga or watch anime, but I do like Junji Ito so ig manga") |
                                   (df['content'] == 'Video games?? If I misread the question then Manga. ') |
                                   (df['content'] == 'Depends on what content you mean. But I guess I could go with Manga and/or books') |
                                   (df['content'] == 'Most of my time gets spent working but if I get some free time its mostly Manga and Games, with some anime on the side if its good'), 
                                   'Manga')
df['content'] = df['content'].mask((df['content'] == "Manhwa (I don't know if this should count under manga, but I'm separating it just in case)") |
                                   (df['content'] == 'manwha'), 
                                   'Manwha/Manhua')
df['content'] = df['content'].mask((df['content'] == 'webtoons') |
                                   (df['content'] == 'Webtoons') |
                                   (df['content'] == 'Webcomic') |
                                   (df['content'] == 'Webtoon'), 
                                   'Webtoon/Webcomic')
df['content'] = df['content'].mask((df['content'] == 'web novels') |
                                   (df['content'] == 'Web Novels') |
                                   (df['content'] == "I don't understand the question. I primarily read web novels."), 
                                   'Web Novel')

df['content'] = df['content'].mask((df['content'] == 'Video games and content about those') | 
                                   (df['content'] == 'Gaming') | 
                                   (df['content'] == 'Games') |
                                   (df['content'] == 'Video games') |
                                   (df['content'] == 'Video Games') |
                                   (df['content'] == 'game') |
                                   (df['content'] == 'Western games') |
                                   (df['content'] == 'games') |
                                   (df['content'] == "rpg/tactics (I don't think I understand the question, but I don't watch TV or read much if that's what you mean)") |
                                   (df['content'] == 'Other video games (mostly RPGs)') |
                                   (df['content'] == 'RPGs') |
                                   (df['content'] == 'JRPG') |
                                   (df['content'] == 'Content could mean many things. At best I could answer this between video games, novels, and movies') |
                                   (df['content'] == 'video games ') |
                                   (df['content'] == 'Not sure i understand the question, but: video games, manga, vtubers') |
                                   (df['content'] == 'none of the above, I play 2d games like Dead Cells, Monster Sanctuary, etc') |
                                   (df['content'] == "As in,japanese content? video games, and forums/socials/wikies where people talk/synthetize about the ones you listed, haven't really watched them in years") |
                                   (df['content'] == 'Games and music') |
                                   (df['content'] == 'Gaming (Action JRPGs)'), 
                                   'Video game')
df['content'] = df['content'].mask((df['content'] == 'TV series') |
                                   (df['content'] == 'Movies, TV, and Games') |
                                   (df['content'] == 'TV, movies') |
                                   (df['content'] == 'TV, movies') |
                                   (df['content'] == 'Hulu') |
                                   (df['content'] == 'TV/Movies') |
                                   (df['content'] == "I don't really read often, mostly stream shows.") |
                                   (df['content'] == 'Movies, and TV shows') |
                                   (df['content'] == 'Movies, novels ') |
                                   (df['content'] == 'Movies and series') |
                                   (df['content'] == '') |
                                   (df['content'] == 'Movies/TV') |
                                   (df['content'] == '') |
                                   (df['content'] == 'Movies, comics') |
                                   (df['content'] == 'Netflix ') |
                                   (df['content'] == 'TV Shows') |
                                   (df['content'] == 'Action movies ') |
                                   (df['content'] == 'Movie ') |
                                   (df['content'] == 'Regular TV shows'), 
                                   'TV/Movie/Streaming Service')
df['content'] = df['content'].mask((df['content'] == 'Newspapers, I guess') |
                                   (df['content'] == 'Novel') |
                                   (df['content'] == 'Non-fiction books') |
                                   (df['content'] == ' Literature') |
                                   (df['content'] == 'Books, games') |
                                   (df['content'] == 'Books (fiction), not anime related (although I do love anime & manga)') |
                                   (df['content'] == 'Normal books, mostly history/general nonfiction ') |
                                   (df['content'] == 'Books') |
                                   (df['content'] == 'News non-fiction') |
                                   (df['content'] == 'This question is unclear, what do you mean "content source", for media consumption, I mostly read books both facta and fiction') |
                                   (df['content'] == 'Books, mainly fantasy novels.') |
                                   (df['content'] == 'novels') |
                                   (df['content'] == "I don't understand. I read novels, not mangas.") | 
                                   (df['content'] == 'Literature') |
                                   (df['content'] == 'Novels, mainly. I love to read and write.') |
                                   (df['content'] == 'English novels') |
                                   (df['content'] == 'Books and Online Texts') |
                                   (df['content'] == 'Novels') |
                                   (df['content'] == 'Book (the real ones)') |
                                   (df['content'] == 'Heavy novel') |
                                   (df['content'] == 'I don\'t consume those, not anymore. I read a lot of books, but they\'re mostly the so-called "literary genre", classics of lit., academic stuff,  non-fiction... I sometimes read comics and some of them are manga, but it\'s usually, again, le "artsy" stuff,  and it\'s a two-three books a year, top, so hardly a primary content source') |
                                   (df['content'] == 'Regular books') |
                                   (df['content'] == 'Nonfiction books'), 
                                   'Book/Newspaper')
df['content'] = df['content'].mask((df['content'] == 'Torrents') |
                                   (df['content'] == 'Web') |
                                   (df['content'] == 'PC?'), 
                                   'Internet')
df['content'] = df['content'].mask((df['content'] == 'YouTube creators ') |
                                   (df['content'] == 'Youtube???') |
                                   (df['content'] == 'Youtube') |
                                   (df['content'] == 'youtube') |
                                   (df['content'] == 'YouTube ') |
                                   (df['content'] == 'YT vids') |
                                   (df['content'] == 'Youtube videos') |
                                   (df['content'] == 'Random stuff on Youtube') |
                                   (df['content'] == "Youtube? I don't really watch anime and related stuff") |
                                   (df['content'] == 'youtube lol') |
                                   (df['content'] == 'Online videos') |
                                   (df['content'] == 'Online Videoes') |
                                   (df['content'] == 'Youtube/Twitch') |
                                   (df['content'] == 'Youtube / Anime') |
                                   (df['content'] == 'Youtube, mostly videos, somtimes streams') |
                                   (df['content'] == 'Videos and games') |
                                   (df['content'] == 'Not really any of the above, more yt videos or fanfics/fancontent') |
                                   (df['content'] == 'non-anime animated videos') |
                                   (df['content'] == 'YouTube'), 
                                   'YouTube/Online Video')
df['content'] = df['content'].mask((df['content'] == 'I watch anime some but most media I consume is more western novels.') |
                                   (df['content'] == 'The only anime I’ve seen is Naruto so I want to say none of the above because I watch American sitcoms.') |
                                   (df['content'] == 'comic books and movies') |
                                   (df['content'] == 'American comics(DC)'), 
                                   'Western media')
df['content'] = df['content'].mask((df['content'] == 'TwitchTV?') |
                                   (df['content'] == 'Streaming') |
                                   (df['content'] == 'Streams') |
                                   (df['content'] == 'Livestreams'), 
                                   'Twitch/Livestreaming')
df['content'] = df['content'].mask((df['content'] == 'Reddit') |
                                   (df['content'] == 'Social Media..?') |
                                   (df['content'] == 'Social media') |
                                   (df['content'] == 'Social Media - YT , IG , TWT'), 
                                   'Social Media')
df['content'] = df['content'].mask((df['content'] == 'Podcasts') |
                                   (df['content'] == "Podcasts? I don't really understand this question."), 
                                   'Podcast')
df['content'] = df['content'].mask((df['content'] == 'Vtubers') |
                                   (df['content'] == 'Vtuber') |
                                   (df['content'] == 'Vtubers '), 
                                   'VTuber')

df['content'] = df['content'].mask((df['content'] == 'everything above ') |
                                   (df['content'] == 'All of the above, although visual novels not so much.') |
                                   (df['content'] == 'all of the above') |
                                   (df['content'] == 'All of the above equally') |
                                   (df['content'] == 'All') |
                                   (df['content'] == 'All of the above and more') |
                                   (df['content'] == 'ALL') |
                                   (df['content'] == 'All of above') |
                                   (df['content'] == 'All of them, if it have a great story and good art') |
                                   (df['content'] == 'A mix of all the above') |
                                   (df['content'] == 'Kind of all of these equally') |
                                   (df['content'] == 'Hent... i mean all the rest') |
                                   (df['content'] == 'Everything of the above ?') |
                                   (df['content'] == 'All of the above fairly equally.') |
                                   (df['content'] == 'I try a little of everything'), 
                                   'All of the above')
df['content'] = df['content'].mask((df['content'] == 'Anime and manga') |
                                   (df['content'] == 'Anime + Manga') |
                                   (df['content'] == 'both manga and anime') |
                                   (df['content'] == 'Anime, Manga, Light Novel') |
                                   (df['content'] == 'anime and manga') |
                                   (df['content'] == 'All three in equal balance (Anime, Manga, LN)') |
                                   (df['content'] == 'All of the above besides visual novels') |
                                   (df['content'] == "Can't separate manga from anime )).") |
                                   (df['content'] == 'Manga and Light Novel equally'), 
                                   'Combination of the above')
df['content'] = df['content'].mask((df['content'] == 'Neither i guess??') |
                                   (df['content'] == 'Not sure what this means but I don’t really involve myself with these on the regular  ') |
                                   (df['content'] == 'None of the above-mentioned ') |
                                   (df['content'] == 'None of these?') |
                                   (df['content'] == 'none of the listed above') |
                                   (df['content'] == 'I don’t understand what this is asking, but I do not really consume any of these') |
                                   (df['content'] == 'Not sure, but definitely not those listed') |
                                   (df['content'] == 'Uhh... not sure but, none?') |
                                   (df['content'] == 'Neither') |
                                   (df['content'] == "I don't really watch/read much. Maybe read a manga or watch an anime once a month?") |
                                   (df['content'] == 'i dont watch anime') |
                                   (df['content'] == "I don't watch much anime") |
                                   (df['content'] == 'None') |
                                   (df['content'] == 'I don’t interact with any of these.') |
                                   (df['content'] == 'I don’t watch tv') |
                                   (df['content'] == 'None?') |
                                   (df['content'] == "I don't read VNs, light novels, manga nor do I watch anime.") |
                                   (df['content'] == 'Nothing, actually. Used to watch Anime before, but not anymore since 3 years ago. ') |
                                   (df['content'] == 'None. ') |
                                   (df['content'] == 'None of the above, but I have consumed anime, manga and light novels, in that order') |
                                   (df['content'] == "Don't really watch this stuff enough"), 
                                   'None of the above')
df['content'] = df['content'].mask((df['content'] == 'Sleep') |
                                   (df['content'] == 'Music') |
                                   (df['content'] == 'Fanfiction websites such as ao3 and fanart websites such as pixiv') |
                                   (df['content'] == 'Hentai') |
                                   (df['content'] == 'Browsing pixiv') |
                                   (df['content'] == 'girlfriend') |
                                   (df['content'] == 'Doujins') |
                                   (df['content'] == 'Animation in general, but I have lost interest in anime.') |
                                   (df['content'] == 'Live action '), 
                                   'Other')
df['content'] = df['content'].mask((df['content'] == "No preference; I view any kind of shows, regardless of whether it's a slice of life anime of two cute anime girls, or Breaking Bad") |
                                   (df['content'] == 'Series') |
                                   (df['content'] == 'series in general') |
                                   (df['content'] == "I don't specify anything. Everything is good") |
                                   (df['content'] == "A little of all? I don't really watch or read anime/manga that often, but if someone specifically recommends something I'll consume it in the content they recommend. "), 
                                   'No preference')
df['content'] = df['content'].mask((df['content'] == "I'm not a weeaboo") | 
                                   (df['content'] == "What??") |
                                   (df['content'] == "??? ") | 
                                   (df['content'] == "Huh?") |
                                   (df['content'] == ":(") |
                                   (df['content'] == "idk🤷\u200d♀️") |
                                   (df['content'] == "Idk what you mean by this") |
                                   (df['content'] == '?') |
                                   (df['content'] == 'Huh? Not the answers I’d expect for that question given the context of the survey. ') |
                                   (df['content'] == "I don't understand this question, sorry") |
                                   (df['content'] == "I don't understand the question") |
                                   (df['content'] == "Don't understand the question") |
                                   (df['content'] == 'Question unclear') |
                                   (df['content'] == 'The question is unclear. ') |
                                   (df['content'] == "I don't understand the question. ") |
                                   (df['content'] == 'this question makes no sense--primary content source for what?') |
                                   (df['content'] == "I don't know for now, things has been ridiculous on internet in recent years, and I'm not even talking about the politically cancerous year of 2016, I argue 2020 and further still keep worse and driven me suicidal for various reasons, especially financial and social aspects of it ") |
                                   (df['content'] == "I don't understand this question.") |
                                   (df['content'] == 'I don\'t understand the question. What "Content"?') |
                                   (df['content'] == "I don't understand this question. Spotify?") |
                                   (df['content'] == '???') |
                                   (df['content'] == 'Unknown') |
                                   (df['content'] == "I'm not sure what you're asking") |
                                   (df['content'] == "I don't understand this question") |
                                   (df['content'] == 'What does this even mean') |
                                   (df['content'] == 'Not a weeb') |
                                   (df['content'] == 'not sure, N/A.') |
                                   (df['content'] == 'Wut'), 
                                   'Did not understand/Ambiguous')

In [21]:
#cleaning continent variable
df['continent'] = df['continent'].mask(df['continent'] == 'Australia', 'Australia/Oceania')

## Export clean data

In [22]:
df.to_csv('cleaned_gacha_survey.csv')