# Creating Table 3
## Loading Libraries and Reading Data

Generating file T3.csv which contains Fire Team IDs for each Character ID.

In [1]:
import json
import pandas as pd

In [2]:
data = open("../data/PGCR_graph_dataset0.json")
data = json.load(data)

In [3]:
data_copy = data

## Exploring the structure of data

In [5]:
# Exploring the structure of data
len(data) # 1003, is a list
len(data[0]) # 25, is a list
data[0][0]["Response"]["entries"][0]["values"]["fireteamId"]["basic"]["value"] # fire team ID
data[0][0]["Response"]["entries"][0]["characterId"] # character ID

'2305843009268720704'

In [6]:
# # game_set 386 has some issues
# for i, game_set in enumerate(data):
#     print(i)
#     if i == 386 : continue
#     for game_info in game_set:
#         for player_info in game_info["Response"]["entries"]:
#             print(player_info["player"]["destinyUserInfo"]["membershipId"])

In [7]:
# removing the 386th element in data
elem = data.pop(386)

## Extracting `playerIDs` and `fireTeamIDs`

In [7]:
playerIDs = [player_info["player"]["destinyUserInfo"]["membershipId"] for game_set in data for game_info in game_set for player_info in game_info["Response"]["entries"]]
len(playerIDs)

KeyError: 'Response'

In [9]:
fireTeamIDs = [player_info["values"]["fireteamId"]["basic"]["value"] for game_set in data for game_info in game_set for player_info in game_info["Response"]["entries"]]
len(fireTeamIDs)

121645

## Creating the Data Frame

In [10]:
T3 = pd.DataFrame({"fireTeamID" : fireTeamIDs, "playerID" : playerIDs})
T3.head()

Unnamed: 0,fireTeamID,playerID
0,-3.685548e+17,4611686018445743058
1,-2.245961e+18,4611686018445765559
2,-5.62082e+18,4611686018436434230
3,1.855335e+17,4611686018446767615
4,3.666009e+18,4611686018455684505


## What is wrong with elem 386?

Using the for loop to investigate the problem, we found that `elem[4]` had some issues.

In [11]:
len(elem)

for i, game_info in enumerate(elem):
    print(i)
    for player_info in game_info["Response"]["entries"]:
        print(player_info["values"]["fireteamId"]["basic"]["value"])

0
1.600697195277455e+18
-6.348326376293105e+18
-6.494090189096198e+18
1.1028136067465902e+17
6.345618395897744e+18
7.292249995381676e+18
6.345618395897744e+18
6.345618395897744e+18
1
-6.784968767515582e+18
-6.494090189096198e+18
-6.54366773945844e+18
8.045228988200403e+18
-5.741375587544479e+18
2.5781988903164544e+18
-7.396708596824929e+18
4.173493896829916e+18
2
7.475683580473355e+18
-1.7666294348954877e+17
2.4083941538884538e+17
-6.899599156010211e+18
-7.170965067668722e+18
-6.494090189096198e+18
-2.70978892716923e+18
2.4083941538884538e+17
3
7.35842355099692e+18
4.819254210965579e+18
6.919517908570447e+18
1.8524873921183473e+18
-1.6663537611408812e+18
-6.494090189096198e+18
2.0248044766801326e+18
-2.075682527226918e+18
-4.410787300159699e+18
4


KeyError: 'Response'

`elem[4]` has non unit `ErrorCode`.

In [12]:
elem[4]

{'ErrorCode': 1618,
 'ErrorStatus': 'DestinyUnexpectedError',
 'Message': "An unexpected error has occurred on Bungie's servers while trying to grab Destiny information.",
 'MessageData': {},
 'ThrottleSeconds': 0}

Clearly, we also need to consider the value of the `ErrorCode` before extracting the information to avoid such errors.

## Modifying the extraction code

In [8]:
data = data_copy

In [9]:
playerIDs = [player_info["characterId"] for game_set in data for game_info in game_set if game_info["ErrorCode"] == 1 for player_info in game_info["Response"]["entries"]]
len(playerIDs)

121744

In [10]:
fireTeamIDs = [player_info["values"]["fireteamId"]["basic"]["value"] for game_set in data for game_info in game_set if game_info["ErrorCode"] == 1 for player_info in game_info["Response"]["entries"]]
len(fireTeamIDs)

121744

In [11]:
T3 = pd.DataFrame({"fireTeamID" : fireTeamIDs, "playerID" : playerIDs})
T3.head()

Unnamed: 0,fireTeamID,playerID
0,-3.685548e+17,2305843009268720704
1,-2.245961e+18,2305843009265606963
2,-5.62082e+18,2305843009262752454
3,1.855335e+17,2305843009261639164
4,3.666009e+18,2305843009265034294


In [12]:
T3.to_csv("T3.csv", index = False)

In [13]:
fireTeamIDs[0]

-3.685548012425789e+17

# Creating table 3 from all the files

Reading all 50 json files

In [1]:
import json
import pandas as pd

In [7]:
file_names = ["../data/PGCR_graph_dataset" + str(i) + ".json" for i in range(1,51)]
# data_files = [json.load(open(file_name)) for file_name in file_names]

Reading json files one at a time, parsing and filtering the data, and finally removing the data from memory to make room for the next file. This will ensure that the script doesn't time out due to memory issues.

In [11]:
playerIDs = []
fireTeamIDs = []
for filename in file_names:
    print("Opening file:", filename)
    data = json.load(open(filename))
    print("File Loaded:", filename) 
    playerIDs.extend([player_info["characterId"] for game_set in data for game_info in game_set if game_info["ErrorCode"] == 1 for player_info in game_info["Response"]["entries"]])
    fireTeamIDs.extend([player_info["values"]["fireteamId"]["basic"]["value"] for game_set in data for game_info in game_set if game_info["ErrorCode"] == 1 for player_info in game_info["Response"]["entries"]])
    print("playerIDs len:", len(playerIDs), "fireTeamIDs len:", len(fireTeamIDs))
    del data

Opening file: ../data/PGCR_graph_dataset1.json
File Loaded: ../data/PGCR_graph_dataset1.json
playerIDs len: 86417 fireTeamIDs len: 86417
Opening file: ../data/PGCR_graph_dataset2.json
File Loaded: ../data/PGCR_graph_dataset2.json
playerIDs len: 171328 fireTeamIDs len: 171328
Opening file: ../data/PGCR_graph_dataset3.json
File Loaded: ../data/PGCR_graph_dataset3.json
playerIDs len: 254036 fireTeamIDs len: 254036
Opening file: ../data/PGCR_graph_dataset4.json
File Loaded: ../data/PGCR_graph_dataset4.json
playerIDs len: 340620 fireTeamIDs len: 340620
Opening file: ../data/PGCR_graph_dataset5.json
File Loaded: ../data/PGCR_graph_dataset5.json
playerIDs len: 424613 fireTeamIDs len: 424613
Opening file: ../data/PGCR_graph_dataset6.json
File Loaded: ../data/PGCR_graph_dataset6.json
playerIDs len: 512983 fireTeamIDs len: 512983
Opening file: ../data/PGCR_graph_dataset7.json
File Loaded: ../data/PGCR_graph_dataset7.json
playerIDs len: 599248 fireTeamIDs len: 599248
Opening file: ../data/PGCR_gr

In [12]:
T3 = pd.DataFrame({"fireTeamID" : fireTeamIDs, "playerID" : playerIDs})
print(T3.head())

     fireTeamID             playerID
0  4.116352e+18  2305843009278974423
1 -6.943784e+18  2305843009278974423
2 -3.508653e+17  2305843009278974423
3  2.043913e+17  2305843009269402895
4 -1.083649e+18  2305843009269402895


In [13]:
T3.shape

(4308218, 2)

In [14]:
T3.drop_duplicates().shape

(3076329, 2)

In [15]:
T3 = T3.drop_duplicates()

In [16]:
T3.to_csv("data_table/table3_complete.csv", index = False)

## Updating table 3 with only team with 4 players

Only keeping the teams which have exactly 4 players in the fireteam.

In [None]:
t3 = pd.read_csv("data_table/table3_complete.csv")
t3.head()

In [None]:
t3.drop_duplicates().shape

In [None]:
t = t3.groupby(['fireTeamID']).agg(['count'])

In [None]:
t.ix[:,0].value_counts()

In [None]:
fireTeams = t[t.ix[:,0]==4].index.values

In [None]:
t3.loc[t3['fireTeamID'].isin(fireTeams)].to_csv("data_table/table3_updated.csv")