## Analisis de Redes Sociales 
# Proyecto Final: **Red Tags de Steam**
#### Miguel Ferreras Chumillas - Pablo Daurell Marina (Grupo 3)
---------------

## Cargar csv

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx

In [2]:
df = pd.read_csv("STEAM-Mayo2019/steam.csv")

df.shape

(27075, 18)

In [3]:
data = df[['name', 'steamspy_tags', 'owners']]
data.shape

(27075, 3)

In [4]:
data.isnull().values.any()

False

In [5]:
data

Unnamed: 0,name,steamspy_tags,owners
0,Counter-Strike,Action;FPS;Multiplayer,10000000-20000000
1,Team Fortress Classic,Action;FPS;Multiplayer,5000000-10000000
2,Day of Defeat,FPS;World War II;Multiplayer,5000000-10000000
3,Deathmatch Classic,Action;FPS;Multiplayer,5000000-10000000
4,Half-Life: Opposing Force,FPS;Action;Sci-fi,5000000-10000000
...,...,...,...
27070,Room of Pandora,Adventure;Indie;Casual,0-20000
27071,Cyber Gun,Action;Indie;Adventure,0-20000
27072,Super Star Blast,Action;Indie;Casual,0-20000
27073,New Yankee 7: Deer Hunters,Indie;Casual;Adventure,0-20000


In [6]:
data = data.rename(columns={'name': 'name', 'steamspy_tags' : 'tags'})
data

Unnamed: 0,name,tags,owners
0,Counter-Strike,Action;FPS;Multiplayer,10000000-20000000
1,Team Fortress Classic,Action;FPS;Multiplayer,5000000-10000000
2,Day of Defeat,FPS;World War II;Multiplayer,5000000-10000000
3,Deathmatch Classic,Action;FPS;Multiplayer,5000000-10000000
4,Half-Life: Opposing Force,FPS;Action;Sci-fi,5000000-10000000
...,...,...,...
27070,Room of Pandora,Adventure;Indie;Casual,0-20000
27071,Cyber Gun,Action;Indie;Adventure,0-20000
27072,Super Star Blast,Action;Indie;Casual,0-20000
27073,New Yankee 7: Deer Hunters,Indie;Casual;Adventure,0-20000


## Eliminar duplicados

In [7]:
data.iloc[np.where(data['name'].duplicated() == True)]

Unnamed: 0,name,tags,owners
4673,Dark Matter,Action;Casual;Indie,100000-200000
7912,Ultimate Arena,Action;Indie;FPS,0-20000
8227,New York Bus Simulator,Simulation,0-20000
9471,Taxi,Casual,0-20000
10337,Mars 2030,Simulation;Mars;Space,0-20000
11450,Rumpus,Action;Indie;Casual,0-20000
14286,Invasion,Action;Indie;Arcade,0-20000
15544,Alter Ego,RPG;Casual;Simulation,0-20000
15617,Castles,Strategy;Adventure,0-20000
15622,Luna,Action;Indie;Puzzle-Platformer,0-20000


In [8]:
data[data.name == 'Luna']

Unnamed: 0,name,tags,owners
13773,Luna,Indie;VR;Puzzle,0-20000
15622,Luna,Action;Indie;Puzzle-Platformer,0-20000


In [9]:
data = data.drop(data[data['name'].duplicated() == True].index)
data.shape

(27033, 3)

In [10]:
data.iloc[np.where(data['name'].duplicated() == True)]

Unnamed: 0,name,tags,owners


## Filtrar por numero de ventas (para red de juegos)
## /* ---------------------------------------

In [193]:
data.iloc[np.where(data['owners'] == '0-20000')]

Unnamed: 0,name,tags,owners
122,Poker Superstars II,Card Game,0-20000
124,RACE: Caterham Expansion,Racing;Simulation,0-20000
145,Zen of Sudoku,Casual;Indie;Puzzle,0-20000
188,Azada,Casual;Indie;Puzzle,0-20000
248,Haunted House™,Adventure,0-20000
...,...,...,...
27070,Room of Pandora,Adventure;Indie;Casual,0-20000
27071,Cyber Gun,Action;Indie;Adventure,0-20000
27072,Super Star Blast,Action;Indie;Casual,0-20000
27073,New Yankee 7: Deer Hunters,Indie;Casual;Adventure,0-20000


In [197]:
# data = data.drop(data[data['owners'] == '0-20000'].index)
d1 = data[data.owners == '10000000-20000000']
d2 = data[data.owners == '5000000-10000000']
d3 = data[data.owners == '2000000-5000000']
print(d1.shape, d2.shape, d3.shape)
d = pd.concat([d1, d2, d3])
d.shape

(21, 3) (46, 3) (193, 3)


(260, 3)

In [204]:
data.iloc[np.where(data['owners'] == '0-20000')]

Unnamed: 0,name,tags,owners


## ---------------------------------------------- */

In [11]:
data.head()

Unnamed: 0,name,tags,owners
0,Counter-Strike,Action;FPS;Multiplayer,10000000-20000000
1,Team Fortress Classic,Action;FPS;Multiplayer,5000000-10000000
2,Day of Defeat,FPS;World War II;Multiplayer,5000000-10000000
3,Deathmatch Classic,Action;FPS;Multiplayer,5000000-10000000
4,Half-Life: Opposing Force,FPS;Action;Sci-fi,5000000-10000000


### Extraer lista de tags

In [12]:
import collections

tags = {}
for i in data.tags:
    for t in i.split(';'):
        if(tags.get(t)):
            c = tags.get(t) + 1
            tags.update({t: c})
        else:
            tags.update({t: 1})

# Ordenado por numero
tags = sorted(tags, key=tags.get, reverse=True)
print("Tags :", len(tags))
print(tags)

Tags : 339
['Indie', 'Action', 'Casual', 'Adventure', 'Strategy', 'Simulation', 'Early Access', 'RPG', 'Free to Play', 'Puzzle', 'VR', 'Sports', 'Racing', 'Platformer', 'Nudity', 'Sexual Content', 'Visual Novel', 'Violent', 'Anime', 'Horror', 'Point & Click', 'Gore', 'Hidden Object', 'Multiplayer', 'FPS', 'Massively Multiplayer', 'Pixel Graphics', "Shoot 'Em Up", 'Open World', 'Survival', 'Space', 'Arcade', 'Female Protagonist', 'RPGMaker', 'RTS', 'Turn-Based', 'Classic', 'Tower Defense', 'Card Game', '2D', 'Singleplayer', 'Story Rich', 'Zombies', 'Sci-fi', 'Utilities', 'World War II', 'Great Soundtrack', 'Co-op', 'Match 3', 'Fighting', 'Management', 'Fantasy', 'Retro', 'Music', 'Board Game', 'Rogue-like', 'Turn-Based Strategy', 'Shooter', 'Bullet Hell', 'Sandbox', 'JRPG', 'Stealth', 'First-Person', 'Comedy', 'Family Friendly', 'Atmospheric', 'Difficult', 'Education', 'Hack and Slash', 'Local Multiplayer', 'Design & Illustration', 'Memes', 'Psychological Horror', 'City Builder', 'Histo

### Extraer lista de juegos

In [14]:
games = []
for g in data.name:
    games.append(g)

print("Juegos: ", len(games))

Juegos:  27033


### Diccionario de tags y juegos ```{tag: [juegos...]}```

In [22]:
### Diccionario de todos los tag con sus correspondientes juegos
data_list = data.values.tolist()
tag_dict = {}
for t in tags:
    l = []
    for d in data_list:
        if d[1].split(';').count(t) > 0:
## Descomentar para añanir lista de subtags a cada juego /*
#             s = []
#             for i in d[1].split(';'):
#                 if(i != t):
#                     s.append(i)
#             l.append((d[0], s))
## */
            l.append(d[0])
    tag_dict.update({t: l})

In [23]:
tag_dict.get('Dark')

['INSIDE',
 'The Guilt and the Shadow',
 'Albert and Otto',
 'Mortificatio',
 'Betweenside']

In [24]:
for t in tags:
    print(t, '->', len(tag_dict.get(t)))

Indie -> 16206
Action -> 10303
Casual -> 8188
Adventure -> 7758
Strategy -> 4170
Simulation -> 3273
Early Access -> 2962
RPG -> 2782
Free to Play -> 1660
Puzzle -> 1116
VR -> 960
Sports -> 781
Racing -> 764
Platformer -> 648
Nudity -> 569
Sexual Content -> 556
Visual Novel -> 546
Violent -> 545
Anime -> 538
Horror -> 492
Point & Click -> 492
Gore -> 476
Hidden Object -> 460
Multiplayer -> 405
FPS -> 404
Massively Multiplayer -> 376
Pixel Graphics -> 249
Shoot 'Em Up -> 245
Open World -> 243
Survival -> 235
Space -> 224
Arcade -> 202
Female Protagonist -> 199
RPGMaker -> 191
RTS -> 174
Turn-Based -> 171
Classic -> 170
Tower Defense -> 168
Card Game -> 164
2D -> 163
Singleplayer -> 161
Story Rich -> 159
Zombies -> 158
Sci-fi -> 157
Utilities -> 138
World War II -> 130
Great Soundtrack -> 125
Co-op -> 120
Match 3 -> 119
Fighting -> 117
Management -> 116
Fantasy -> 115
Retro -> 114
Music -> 105
Board Game -> 105
Rogue-like -> 104
Turn-Based Strategy -> 103
Shooter -> 101
Bullet Hell -> 96


## Construir redes

In [26]:
### Recibe dos listas y devuelve el numero de elementos en comun
def tags_in_common(a, b):
    n = 0
    for i in a:
        for j in b:
            if i == j:
                n = n+1
    return n

### - Red de tags

In [None]:
G_tags = nx.Graph()
G_tags.add_nodes_from(tags)

tag_list = list(tag_dict.items())

i = 0
while i < len(tag_dict):
    print(i, end='\r')
    
    t1 = tag_list[i][1] # Lista de tags juego 1
    
    j = i + 1
    while j < len(tag_dict):
        t2 = tag_list[j][1] # Lista de tags juego 2
        
        n = tags_in_common(t1, t2)
        if n > 0:
            G_tags.add_edge(tag_list[i][0], tag_list[j][0], weight=n)
        
        j += 1
    i += 1

In [None]:
nx.number_of_nodes(G_tags)

In [None]:
nx.number_of_edges(G_tags)

In [69]:
nx.write_gexf(G_tags, './Tags.gexf')

### - Red de juegos

In [47]:
# import csv

# with open('adj_list.csv', mode='w', encoding="utf-8") as file:
#     writer = csv.writer(file)
#     writer.writerow(["source", "target"])
    
#     i = 0
#     while i < len(data_list):
#         print(i, end='\r')

#         t1 = data_list[i][1].split(';') # Lista de tags juego 1

#         j = i + 1 
#         while j < len(data_list): # Recorrer resto de juegos
#             t2 = data_list[j][1].split(';') # Lista de tags juego 2

#             n = tags_in_common(t1, t2)
#             for m in range(n): # Añadir num de aristas correspondientes
#                 writer.writerow([data_list[i][0], data_list[j][0]])

#             j += 1

#         i += 1

27032

In [None]:
G = nx.Graph()
G.add_nodes_from(games)

data_list = data.values.tolist()

i = 0
while i < len(data_list):
    print(i, end='\r')

    t1 = data_list[i][1].split(';') # Lista de tags juego 1
    
    j = i + 1 
    while j < len(data_list): # Recorrer resto de juegos
        t2 = data_list[j][1].split(';') # Lista de tags juego 2
        
        n = tags_in_common(t1, t2)
        if n > 0:
            G.add_edge(data_list[i][0], data_list[j][0], weight=n)
            

        j += 1
    
    i += 1

In [None]:
nx.number_of_nodes(G)

In [None]:
nx.number_of_edges(G)

In [239]:
nx.write_gexf(G, 'Games.gexf')

### - Red de juegos por tag

In [262]:
G = nx.Graph()
tag = '2D'

for g in tag_dict.get(tag):
    G.add_node(g[0])

l = tag_dict.get(tag)
i = 0
for i in range(len(l)):
    print(i, end='\r')
    
    t1 = l[i][1] # Lista de tags juego 1
    
    j = i + 1
    while j < len(l):    
        t2 = l[j][1] # Lista de tags juego 2
        n = tags_in_common(t1, t2)
        if n > 0:
            G.add_edge(l[i][0], l[j][0], weight=n)
        j += 1

print('Nodos: ', nx.number_of_nodes(G))
print('Aristas: ', nx.number_of_edges(G))
      
nx.write_gexf(G, '{}.gexf'.format(tag))
print('{}.gexf generado'.format(tag))

Nodos:  190
Aristas:  7387
2D.gexf generado
