## Extracting Tables from Wikia Site

In [148]:
import pandas as pd

url = "https://gamedevtycoon.fandom.com/wiki/Game_Development_Based_on_Experience/1.4.3#Multi-Genre_combinations"

In [149]:
list_of_dfs = pd.read_html(url)

In [150]:
df = list_of_dfs[2]
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,Topic,Genres,Genres,Genres,Genres,Genres,Genres,,Audiences,Audiences,Audiences
1,Topic,Action,Adventure,RPG,Simulation,Strategy,Casual,,Y,E,M
2,Abstract,++,+++,−−−,−−−,+,−−−,,+,++,+++
3,Airplane,+++,−−−,+,+++,+++,+++,,+++,+++,++
4,Aliens,+++,+,+++,−−−,++,−−,,++,+++,+++


In [151]:
topics = df.iloc[1:-2,0:7]

In [152]:
def convert_to_numerical(value):
    plus_count = value.count('+')
    minus_count = value.count('−')
    tilde_count = value.count('~')
    
    if tilde_count == 1:
        return 0
    
    if plus_count > minus_count:
        return plus_count
    elif plus_count < minus_count:
        return -minus_count
    else:
        return value

topics = topics.applymap(convert_to_numerical)

In [153]:
topics

Unnamed: 0,0,1,2,3,4,5,6
1,Topic,Action,Adventure,RPG,Simulation,Strategy,Casual
2,Abstract,2,3,-3,-3,1,-3
3,Airplane,3,-3,1,3,3,3
4,Aliens,3,1,3,-3,2,-2
5,Alternate History,3,1,3,1,2,-3
...,...,...,...,...,...,...,...
64,Virtual Pet,-3,1,2,3,2,3
65,Vocabulary,-3,-3,-3,3,3,3
66,Werewolf,3,1,3,-3,-3,-2
67,Wild West,2,-2,3,-3,-3,-2


In [154]:
audience = df.iloc[1:-2,-3:]
audience_first_col = df.iloc[1:,0]
audience.insert(0,"Topics",audience_first_col)

In [155]:
audience = audience.applymap(convert_to_numerical)
audience

Unnamed: 0,Topics,8,9,10
1,Topic,Y,E,M
2,Abstract,1,2,3
3,Airplane,3,3,2
4,Aliens,2,3,3
5,Alternate History,-3,3,3
...,...,...,...,...
64,Virtual Pet,3,1,-2
65,Vocabulary,2,3,-3
66,Werewolf,-2,2,3
67,Wild West,3,2,3


In [156]:
system_table = list_of_dfs[3]

In [157]:
systems_topics = system_table.iloc[1:-3,0:7]
systems_topics = systems_topics.applymap(convert_to_numerical)
systems_topics

Unnamed: 0,0,1,2,3,4,5,6
1,System,Action,Adventure,RPG,Simulation,Strategy,Casual
2,PC,2,3,2,3,3,-3
3,G64,2,3,2,2,3,-2
4,TES,1,-2,1,1,-2,3
5,Master V,2,-2,1,1,-2,3
6,Gameling,1,-2,2,2,-3,3
7,Vena Gear,2,1,1,2,-3,3
8,Vena Oasis,3,1,1,2,-3,-2
9,Super TES,2,2,2,3,-2,2
10,Playsystem,3,1,3,2,-2,-3


In [158]:
systems_audience = system_table.iloc[1:-3,-3:]
systems_audience_first_col = system_table.iloc[1:,0]
systems_audience.insert(0,"System",systems_audience_first_col)
systems_audience = systems_audience.applymap(convert_to_numerical)
systems_audience

Unnamed: 0,System,8,9,10
1,System,Y,E,M
2,PC,1,2,3
3,G64,1,2,3
4,TES,3,2,-3
5,Master V,2,3,-2
6,Gameling,3,2,-3
7,Vena Gear,2,3,1
8,Vena Oasis,1,3,2
9,Super TES,3,2,-2
10,Playsystem,1,3,2


In [159]:
#topics.to_csv("topics.csv")

In [160]:
#audience.to_csv("audience.csv")
#systems_topics.to_csv("systems_topics.csv")
#systems_audience.to_csv("systems_audience.csv")

In [161]:
release_dates = [
    'Release_Date',
    '1/1/0001',
    '1/1/0001',
    '3/2/0002',
    '6/4/0003',
    '2/4/0004',
    '9/2/0004',
    '11/1/0005',
    '10/4/0006',
    '8/1/0008',
    '6/2/0010',
    '9/4/0011',
    '2/1/0013',
    '10/4/0013',
    '11/4/0014',
    '10/1/0015',
    '6/2/0016',
    '4/1/0019',
    '1/3/0020',
    '4/4/0020',
    '10/4/0020',
    '8/2/0021',
    '9/1/0022',
    '2/3/0024',
    '6/2/0026',
    '6/1/0027',
    '8/2/0027',
    '2/1/0032',
    '4/3/0032'
]

In [162]:
systems_topics

Unnamed: 0,0,1,2,3,4,5,6
1,System,Action,Adventure,RPG,Simulation,Strategy,Casual
2,PC,2,3,2,3,3,-3
3,G64,2,3,2,2,3,-2
4,TES,1,-2,1,1,-2,3
5,Master V,2,-2,1,1,-2,3
6,Gameling,1,-2,2,2,-3,3
7,Vena Gear,2,1,1,2,-3,3
8,Vena Oasis,3,1,1,2,-3,-2
9,Super TES,2,2,2,3,-2,2
10,Playsystem,3,1,3,2,-2,-3


In [167]:
systems_topics.to_csv("systems_topics.csv")
systems_audience.to_csv("systems_audience.csv")

In [164]:
systems_audience

Unnamed: 0,System,8,9,10
1,System,Y,E,M
2,PC,1,2,3
3,G64,1,2,3
4,TES,3,2,-3
5,Master V,2,3,-2
6,Gameling,3,2,-3
7,Vena Gear,2,3,1
8,Vena Oasis,1,3,2
9,Super TES,3,2,-2
10,Playsystem,1,3,2


In [165]:
topics

Unnamed: 0,0,1,2,3,4,5,6
1,Topic,Action,Adventure,RPG,Simulation,Strategy,Casual
2,Abstract,2,3,-3,-3,1,-3
3,Airplane,3,-3,1,3,3,3
4,Aliens,3,1,3,-3,2,-2
5,Alternate History,3,1,3,1,2,-3
...,...,...,...,...,...,...,...
64,Virtual Pet,-3,1,2,3,2,3
65,Vocabulary,-3,-3,-3,3,3,3
66,Werewolf,3,1,3,-3,-3,-2
67,Wild West,2,-2,3,-3,-3,-2


In [166]:
systems_topics

Unnamed: 0,0,1,2,3,4,5,6
1,System,Action,Adventure,RPG,Simulation,Strategy,Casual
2,PC,2,3,2,3,3,-3
3,G64,2,3,2,2,3,-2
4,TES,1,-2,1,1,-2,3
5,Master V,2,-2,1,1,-2,3
6,Gameling,1,-2,2,2,-3,3
7,Vena Gear,2,1,1,2,-3,3
8,Vena Oasis,3,1,1,2,-3,-2
9,Super TES,2,2,2,3,-2,2
10,Playsystem,3,1,3,2,-2,-3


In [243]:
topics = pd.read_csv('topics.csv')
audience = pd.read_csv('audience.csv')
systems_topics = pd.read_csv('systems_topics.csv')
systems_audience = pd.read_csv('systems_audience.csv')

topics = topics.iloc[:,1:]
topics_temp = topics.copy()

topics_combinations = []

for i in range(0,len(topics_temp)):
    for j in range(1,len(topics_temp.columns)):
        if(topics_temp.iloc[i,j]=='3'):
            topics_combinations.append(f"{topics_temp.columns[j]} - {topics_temp.iloc[i,0]}")
        
topics_combinations.sort()
print(topics_combinations)

['Action - Airplane', 'Action - Aliens', 'Action - Alternate History', 'Action - Assassin', 'Action - Crime', 'Action - Cyberpunk', 'Action - Dungeon', 'Action - Extreme Sports', 'Action - Fantasy', 'Action - Horror', 'Action - Hunting', 'Action - Martial Arts', 'Action - Medieval', 'Action - Military', 'Action - Music*', 'Action - Mythology', 'Action - Ninja', 'Action - Post Apocalyptic', 'Action - Prison', 'Action - Rhythm*', 'Action - Sci-Fi', 'Action - Space', 'Action - Sports*', 'Action - Spy', 'Action - Superheroes', 'Action - UFO', 'Action - Vampire', 'Action - Werewolf', 'Action - Zombies', 'Adventure - Abstract', 'Adventure - Comedy', 'Adventure - Detective*', 'Adventure - Fantasy', 'Adventure - Horror', 'Adventure - Law*', 'Adventure - Life', 'Adventure - Mad Science', 'Adventure - Medieval', 'Adventure - Mystery*', 'Adventure - Pirate', 'Adventure - Prison', 'Adventure - Romance', 'Adventure - School', 'Adventure - Sci-Fi', 'Adventure - Spy', 'Adventure - Time Travel', 'Casu

In [237]:
audience = audience.iloc[:,1:]
audience

Unnamed: 0,Topic,Y,E,M
0,Abstract,1,2,3
1,Airplane,3,3,2
2,Aliens,2,3,3
3,Alternate History,-3,3,3
4,Assassin,-3,1,3
...,...,...,...,...
62,Virtual Pet,3,1,-2
63,Vocabulary,2,3,-3
64,Werewolf,-2,2,3
65,Wild West,3,2,3


In [241]:
audience_combinations = []
for i in range(0,len(audience)):
    for j in range(1,len(audience.columns)):
        if(audience.iloc[i,j]=='3'):
            audience_combinations.append(f"{audience.columns[j]} - {audience.iloc[i,1]}")

In [242]:
audience_combinations.sort()
print(audience_combinations)

['E - Airplane', 'E - Aliens', 'E - Alternate History', 'E - Business', 'E - City', 'E - Colonization', 'E - Construction', 'E - Cooking', 'E - Detective*', 'E - Dungeon', 'E - Evolution', 'E - Expedition', 'E - Fantasy', 'E - Farming', 'E - Game Dev', 'E - Government', 'E - History', 'E - Hospital', 'E - Hunting', 'E - Law*', 'E - Life', 'E - Medieval', 'E - Movies', 'E - Mythology', 'E - Pirate', 'E - Racing*', 'E - Romance', 'E - Sci-Fi', 'E - Space', 'E - Sports*', 'E - Superheroes', 'E - Surgery', 'E - Technology', 'E - Thief', 'E - Time Travel', 'E - Transport', 'E - UFO', 'E - Vampire', 'E - Vocabulary', 'M - Abstract', 'M - Aliens', 'M - Alternate History', 'M - Assassin', 'M - Comedy', 'M - Crime', 'M - Cyberpunk', 'M - Disasters', 'M - Dungeon', 'M - Dystopian', 'M - Extreme Sports', 'M - Fantasy', 'M - Hacking', 'M - Horror', 'M - Mad Science', 'M - Martial Arts', 'M - Military', 'M - Mystery*', 'M - Mythology', 'M - Post Apocalyptic', 'M - Prison', 'M - Romance', 'M - Sci-F

In [251]:
topics_combinations = [x.replace(' ','') for x in topics_combinations]
audience_combinations = [x.replace(' ','') for x in audience_combinations]
print(topics_combinations)
print(audience_combinations)

['Action-Airplane', 'Action-Aliens', 'Action-AlternateHistory', 'Action-Assassin', 'Action-Crime', 'Action-Cyberpunk', 'Action-Dungeon', 'Action-ExtremeSports', 'Action-Fantasy', 'Action-Horror', 'Action-Hunting', 'Action-MartialArts', 'Action-Medieval', 'Action-Military', 'Action-Music*', 'Action-Mythology', 'Action-Ninja', 'Action-PostApocalyptic', 'Action-Prison', 'Action-Rhythm*', 'Action-Sci-Fi', 'Action-Space', 'Action-Sports*', 'Action-Spy', 'Action-Superheroes', 'Action-UFO', 'Action-Vampire', 'Action-Werewolf', 'Action-Zombies', 'Adventure-Abstract', 'Adventure-Comedy', 'Adventure-Detective*', 'Adventure-Fantasy', 'Adventure-Horror', 'Adventure-Law*', 'Adventure-Life', 'Adventure-MadScience', 'Adventure-Medieval', 'Adventure-Mystery*', 'Adventure-Pirate', 'Adventure-Prison', 'Adventure-Romance', 'Adventure-School', 'Adventure-Sci-Fi', 'Adventure-Spy', 'Adventure-TimeTravel', 'Casual-Airplane', 'Casual-Comedy', 'Casual-Cooking', 'Casual-Dance', 'Casual-Farming', 'Casual-Fashion