# Gathering & Wrangling

<ul>
<li><a href="#test">Testing</a></li>
<li><a href="#gather">Gather</a>
    <ul>
        <li><a href="#archive-g">Twitter Archive</a></li>
        <li><a href="#predictions-g">Image Predictions</a></li>
        <li><a href="#api-g">API Data</a></li>
    </ul>
<li><a href="#assess">Assess the Data</a>
    <ul>
        <li><a href="#archive-a">`archive` table</a></li>
        <li><a href="#predictions-a">`predictions` table</a></li>
        <li><a href="#api-a">`api_data` table</a></li>
        <li><a href="#inclusion">Data Inclusion Criteria</a></li>
        <li><a href="#findings">Findings</a></li>
    </ul>
<li><a href="#clean">Clean the Data</a></li>
<li><a href="#analysis">Analysis & Visualization</a></li>
<li><a href="#conclusions">Conclusions</a></li>
</ul>

<a id='test'></a>
## Testing

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import json
import os

In [2]:
api_key = 'hidden'

In [2]:
# Get all of the log summary info for the guild
antorus_fights = requests.get("https://www.warcraftlogs.com:443/v1/reports/guild/Last%20Pull/Cenarius/US&api_key=" + api_key)

In [8]:
antorus_fights.json()

[{'id': 'VhmB61LqvrRQPFwJ',
  'title': 'Mythic DEAD GUY WOOOOOOOO',
  'owner': 'Shadowbaine',
  'start': 1530152172272,
  'end': 1530161374270,
  'zone': 17},
 {'id': 'cR4g7wGY2z9VWbxH',
  'title': 'Mythic Argus ',
  'owner': 'Shadowbaine',
  'start': 1530064410476,
  'end': 1530075546810,
  'zone': 17},
 {'id': 'nRG1BJmD6fhC2vNq',
  'title': 'Mythic Bad Guy',
  'owner': 'Shadowbaine',
  'start': 1529632711055,
  'end': 1529644536361,
  'zone': 17},
 {'id': 'Tvrh8BgQL6y2qPza',
  'title': 'Mythic Argus',
  'owner': 'Shadowbaine',
  'start': 1529545941388,
  'end': 1529556954200,
  'zone': 17},
 {'id': 'Bqr8ZpYX6AV2cynW',
  'title': 'Mythic Argus',
  'owner': 'Shadowbaine',
  'start': 1529460514043,
  'end': 1529470825480,
  'zone': 17},
 {'id': 'qRpvXcKG6NAaDCJ4',
  'title': 'Mythic Argus',
  'owner': 'Shadowbaine',
  'start': 1529030268969,
  'end': 1529038771495,
  'zone': 17},
 {'id': '8QYh3aAXpgPLjydJ',
  'title': 'Mythic Argush',
  'owner': 'Shadowbaine',
  'start': 1528942270020,


In [9]:
# Get all fight IDs for a particular log
log = requests.get("https://www.warcraftlogs.com:443/v1/report/fights/VhmB61LqvrRQPFwJ&api_key=" + api_key)

In [12]:
logs = log.json()
logs

{'fights': [{'id': 1,
   'start_time': 0,
   'end_time': 595127,
   'boss': 2092,
   'size': 20,
   'difficulty': 5,
   'kill': False,
   'partial': 3,
   'bossPercentage': 1918,
   'fightPercentage': 1572,
   'lastPhaseForPercentageDisplay': 4,
   'name': 'Argus the Unmaker'},
  {'id': 2,
   'start_time': 721714,
   'end_time': 1260627,
   'boss': 2092,
   'size': 20,
   'difficulty': 5,
   'kill': False,
   'partial': 3,
   'bossPercentage': 2993,
   'fightPercentage': 2453,
   'lastPhaseForPercentageDisplay': 4,
   'name': 'Argus the Unmaker'},
  {'id': 3,
   'start_time': 1365349,
   'end_time': 1726732,
   'boss': 2092,
   'size': 20,
   'difficulty': 5,
   'kill': False,
   'partial': 3,
   'bossPercentage': 5094,
   'fightPercentage': 4175,
   'lastPhaseForPercentageDisplay': 4,
   'name': 'Argus the Unmaker'},
  {'id': 4,
   'start_time': 1812855,
   'end_time': 2368530,
   'boss': 2092,
   'size': 20,
   'difficulty': 5,
   'kill': False,
   'partial': 3,
   'bossPercentage': 

In [14]:
logs.keys()

dict_keys(['fights', 'lang', 'friendlies', 'enemies', 'friendlyPets', 'enemyPets', 'phases', 'title', 'owner', 'start', 'end', 'zone'])

In [13]:
logs['fights']

[{'id': 1,
  'start_time': 0,
  'end_time': 595127,
  'boss': 2092,
  'size': 20,
  'difficulty': 5,
  'kill': False,
  'partial': 3,
  'bossPercentage': 1918,
  'fightPercentage': 1572,
  'lastPhaseForPercentageDisplay': 4,
  'name': 'Argus the Unmaker'},
 {'id': 2,
  'start_time': 721714,
  'end_time': 1260627,
  'boss': 2092,
  'size': 20,
  'difficulty': 5,
  'kill': False,
  'partial': 3,
  'bossPercentage': 2993,
  'fightPercentage': 2453,
  'lastPhaseForPercentageDisplay': 4,
  'name': 'Argus the Unmaker'},
 {'id': 3,
  'start_time': 1365349,
  'end_time': 1726732,
  'boss': 2092,
  'size': 20,
  'difficulty': 5,
  'kill': False,
  'partial': 3,
  'bossPercentage': 5094,
  'fightPercentage': 4175,
  'lastPhaseForPercentageDisplay': 4,
  'name': 'Argus the Unmaker'},
 {'id': 4,
  'start_time': 1812855,
  'end_time': 2368530,
  'boss': 2092,
  'size': 20,
  'difficulty': 5,
  'kill': False,
  'partial': 3,
  'bossPercentage': 3099,
  'fightPercentage': 2540,
  'lastPhaseForPercent

In [15]:
logs['friendlies']

[{'name': 'Metonymy',
  'id': 11,
  'guid': 107865817,
  'type': 'Mage',
  'fights': [{'id': 1},
   {'id': 2},
   {'id': 3},
   {'id': 4},
   {'id': 5},
   {'id': 6},
   {'id': 7},
   {'id': 8},
   {'id': 9},
   {'id': 10},
   {'id': 11},
   {'id': 12},
   {'id': 13},
   {'id': 14}]},
 {'name': 'Hati',
  'id': 25,
  'guid': 106551,
  'type': 'Pet',
  'fights': [{'id': 1, 'instances': 1}, {'id': 14, 'instances': 1}]},
 {'name': 'Ayriea',
  'id': 10,
  'guid': 132573980,
  'type': 'Hunter',
  'fights': [{'id': 1},
   {'id': 2},
   {'id': 3},
   {'id': 4},
   {'id': 5},
   {'id': 6},
   {'id': 7},
   {'id': 8},
   {'id': 9},
   {'id': 10},
   {'id': 11},
   {'id': 12},
   {'id': 13},
   {'id': 14}]},
 {'name': 'Vaelyra',
  'id': 20,
  'guid': 130076470,
  'type': 'DemonHunter',
  'fights': [{'id': 1},
   {'id': 2},
   {'id': 3},
   {'id': 4},
   {'id': 5},
   {'id': 6},
   {'id': 7},
   {'id': 8},
   {'id': 9},
   {'id': 10},
   {'id': 11},
   {'id': 12},
   {'id': 13},
   {'id': 14}]},
 

In [16]:
logs['phases']

[{'boss': 2092,
  'phases': ['Stage One: Storm and Sky',
   'Stage Two: The Protector Redeemed',
   'Stage Three: The Arcane Masters',
   'Stage Four: The Gift of Life, The Forge of Loss']}]

In [47]:
# Get info about who was in the fight and summary details
fight = requests.get("https://www.warcraftlogs.com:443/v1/report/events/VhmB61LqvrRQPFwJ&api_key=" + api_key)

In [48]:
fight_info = fight.json()
fight_info

{'events': [{'timestamp': 0,
   'type': 'encounterstart',
   'name': 'Argus the Unmaker',
   'difficulty': 5,
   'size': 20,
   'encounterID': 2092},
  {'timestamp': 0,
   'type': 'combatantinfo',
   'sourceID': 1,
   'specID': 104,
   'strength': 4400,
   'agility': 52870,
   'stamina': 199117,
   'intellect': 7328,
   'dodge': 5559,
   'parry': 0,
   'block': 0,
   'armor': 3017,
   'critMelee': 5559,
   'critRanged': 5559,
   'critSpell': 5559,
   'speed': 1437,
   'leech': 0,
   'hasteMelee': 8624,
   'hasteRanged': 8624,
   'hasteSpell': 8624,
   'avoidance': 2852,
   'mastery': 13172,
   'versatilityDamageDone': 11886,
   'versatilityHealingDone': 11886,
   'versatilityDamageReduction': 11886,
   'talents': [{'id': 155835, 'icon': 'spell_druid_bristlingfur.jpg'},
    {'id': 204012, 'icon': 'ability_druid_enrage.jpg'},
    {'id': 197488, 'icon': 'ability_druid_improvedmoonkinform.jpg'},
    {'id': 5211, 'icon': 'ability_druid_bash.jpg'},
    {'id': 203964, 'icon': 'spell_frost_ice

In [21]:
fight_info.keys()

dict_keys(['events'])

In [25]:
fight_info['events'][0]

{'timestamp': 0,
 'type': 'encounterstart',
 'name': 'Argus the Unmaker',
 'difficulty': 5,
 'size': 20,
 'encounterID': 2092}

In [26]:
len(fight_info['events'])

21

In [27]:
fight_info['events'][1]

{'timestamp': 0,
 'type': 'combatantinfo',
 'sourceID': 1,
 'specID': 104,
 'strength': 4400,
 'agility': 52870,
 'stamina': 199117,
 'intellect': 7328,
 'dodge': 5559,
 'parry': 0,
 'block': 0,
 'armor': 3017,
 'critMelee': 5559,
 'critRanged': 5559,
 'critSpell': 5559,
 'speed': 1437,
 'leech': 0,
 'hasteMelee': 8624,
 'hasteRanged': 8624,
 'hasteSpell': 8624,
 'avoidance': 2852,
 'mastery': 13172,
 'versatilityDamageDone': 11886,
 'versatilityHealingDone': 11886,
 'versatilityDamageReduction': 11886,
 'talents': [{'id': 155835, 'icon': 'spell_druid_bristlingfur.jpg'},
  {'id': 204012, 'icon': 'ability_druid_enrage.jpg'},
  {'id': 197488, 'icon': 'ability_druid_improvedmoonkinform.jpg'},
  {'id': 5211, 'icon': 'ability_druid_bash.jpg'},
  {'id': 203964, 'icon': 'spell_frost_iceclaw.jpg'},
  {'id': 203965, 'icon': 'ability_druid_enrage.jpg'},
  {'id': 204053, 'icon': 'ability_druid_swipe.jpg'}],
 'pvpTalents': [{'id': 208683, 'icon': 'ability_pvp_gladiatormedallion.jpg'},
  {'id': 207

In [45]:
damage = requests.get("https://www.warcraftlogs.com:443/v1/report/tables/'damage-taken'/VhmB61LqvrRQPFwJ?end=9201998?api_key=" + api_key)

In [46]:
damage.json()

{'status': 401, 'error': 'Invalid key specified.'}

<a id='gather'></a>
## Gather

### General Log Info

In [5]:
# Get all guild logs
guild_logs = requests.get("https://www.warcraftlogs.com:443/v1/reports/guild/Last%20Pull/Cenarius/US?api_key=" + api_key)

In [6]:
log_list = guild_logs.json()
log_list

[{'id': 'VhmB61LqvrRQPFwJ',
  'title': 'Mythic DEAD GUY WOOOOOOOO',
  'owner': 'Shadowbaine',
  'start': 1530152172272,
  'end': 1530161374270,
  'zone': 17},
 {'id': 'cR4g7wGY2z9VWbxH',
  'title': 'Mythic Argus ',
  'owner': 'Shadowbaine',
  'start': 1530064410476,
  'end': 1530075546810,
  'zone': 17},
 {'id': 'nRG1BJmD6fhC2vNq',
  'title': 'Mythic Bad Guy',
  'owner': 'Shadowbaine',
  'start': 1529632711055,
  'end': 1529644536361,
  'zone': 17},
 {'id': 'Tvrh8BgQL6y2qPza',
  'title': 'Mythic Argus',
  'owner': 'Shadowbaine',
  'start': 1529545941388,
  'end': 1529556954200,
  'zone': 17},
 {'id': 'Bqr8ZpYX6AV2cynW',
  'title': 'Mythic Argus',
  'owner': 'Shadowbaine',
  'start': 1529460514043,
  'end': 1529470825480,
  'zone': 17},
 {'id': 'qRpvXcKG6NAaDCJ4',
  'title': 'Mythic Argus',
  'owner': 'Shadowbaine',
  'start': 1529030268969,
  'end': 1529038771495,
  'zone': 17},
 {'id': '8QYh3aAXpgPLjydJ',
  'title': 'Mythic Argush',
  'owner': 'Shadowbaine',
  'start': 1528942270020,


First Antorus start: 1511926903194

In [7]:
# Get log ids for Antorus
log_ids = []
for log in log_list:
    if log['start'] >= 1511926903194:
        log_ids.append(log['id'])

In [30]:
log_ids

['VhmB61LqvrRQPFwJ',
 'cR4g7wGY2z9VWbxH',
 'nRG1BJmD6fhC2vNq',
 'Tvrh8BgQL6y2qPza',
 'Bqr8ZpYX6AV2cynW',
 'qRpvXcKG6NAaDCJ4',
 '8QYh3aAXpgPLjydJ',
 '2cVBtJTgC1KLn7Am',
 'rHCXbP8aVj72DQAY',
 'C8VnGm3qW9Ldzwvg',
 'cNbjQKh89RxWVPDp',
 '7JZQcG6tC4hfyX82',
 'R8kzbqTcYv4tw7rB',
 'b2Y4XRNw8ahjrPpW',
 'wdMvj2PVnQzafTYx',
 'pB8ftHNTh4gcKVFP',
 '1dApmwBHyQ2vcVTF',
 'kmq1ZFzBdDPNHjaK',
 '9MwfRvrtgmq38V4c',
 'p1wmbZXK4FNTfMjC',
 'bnCyMRDdKLaVrcJN',
 '1rVkmpa86TYCB9wL',
 'r1N7Taq3bDZPc6pm',
 'vVjYPmq6kxZtGDXA',
 '6VHgbZBGmwRWjp4Q',
 '3kXwT8yaYrQfK9W6',
 'bM9Cy3afr2XHBNp1',
 'Qn4HJycG2TxPKNbY',
 'Vx3ncMyNvF1HGjrR',
 'tZQFbvABVh9rfDHq',
 'vhr9WzGRXyqtCx4F',
 '3MP8dq4GAkxFgcNQ',
 'D6zV34J7FtgRcLBq',
 'WQHTpVLXJh2k9vnb',
 'vYDj1qQgcXWdmTnF',
 'a7Pqm1ZvfWhVY4Hn',
 'fKVA7k6QLMmF2yH4',
 'K4zJxrtGHL8vP9ap',
 'Q47z3KZ29tfGxCDW',
 'kL7rz9c3RaAtNPGx',
 'v4aXQJrC3W6ybj9Y',
 '1q8V234wXDhrRakC',
 'hcxWNVw3t98KYB1Z',
 'dqn3HbvfB8DCr6F1',
 'QVtkP4q3T2bLcZKJ',
 '26gRPafVjHzvLQFN',
 'KtCGWjYTqL918FxZ',
 'MzDx9yd6jJC

In [50]:
link = "https://www.warcraftlogs.com:443/v1/report/fights/" + log_ids[0] + "?api_key=" + api_key
link

'https://www.warcraftlogs.com:443/v1/report/fights/VhmB61LqvrRQPFwJ?api_key=0a60df4bb184d6c614fb96e0126bd407'

In [69]:
# Get all fight IDs for a particular log
log = requests.get(link)

In [70]:
log = log.json()
log

{'fights': [{'id': 1,
   'start_time': 0,
   'end_time': 595127,
   'boss': 2092,
   'size': 20,
   'difficulty': 5,
   'kill': False,
   'partial': 3,
   'bossPercentage': 1918,
   'fightPercentage': 1572,
   'lastPhaseForPercentageDisplay': 4,
   'name': 'Argus the Unmaker'},
  {'id': 2,
   'start_time': 721714,
   'end_time': 1260627,
   'boss': 2092,
   'size': 20,
   'difficulty': 5,
   'kill': False,
   'partial': 3,
   'bossPercentage': 2993,
   'fightPercentage': 2453,
   'lastPhaseForPercentageDisplay': 4,
   'name': 'Argus the Unmaker'},
  {'id': 3,
   'start_time': 1365349,
   'end_time': 1726732,
   'boss': 2092,
   'size': 20,
   'difficulty': 5,
   'kill': False,
   'partial': 3,
   'bossPercentage': 5094,
   'fightPercentage': 4175,
   'lastPhaseForPercentageDisplay': 4,
   'name': 'Argus the Unmaker'},
  {'id': 4,
   'start_time': 1812855,
   'end_time': 2368530,
   'boss': 2092,
   'size': 20,
   'difficulty': 5,
   'kill': False,
   'partial': 3,
   'bossPercentage': 

In [74]:
log_keys = list(log.keys())
log_keys

['fights',
 'lang',
 'friendlies',
 'enemies',
 'friendlyPets',
 'enemyPets',
 'phases',
 'title',
 'owner',
 'start',
 'end',
 'zone']

In [88]:
# Make file if it doesn't already exist
file_name = log_ids[0] + '_log_details.txt'
if not os.path.isfile(file_name):
    open(file_name, 'w').close()

In [93]:
# Save log data
data = {}
for key in ['fights', 'friendlies', 'enemies']:
    data[key] = []
    for entry in log[key]:
        data[key].append(entry)

with open(file_name, "w") as file:
    json.dump(data, file)

In [102]:
# Create files for all log info
for log_id in log_ids:
    link = "https://www.warcraftlogs.com:443/v1/report/fights/" + log_id + "?api_key=" + api_key
    log = requests.get(link)
    log = log.json()
    data = {}
    file_name = log_id + '_log_details.txt'
    if not os.path.isfile(file_name):
        open(file_name, 'w').close()
    for key in ['fights', 'friendlies', 'enemies']:
        data[key] = []
        for entry in log[key]:
            data[key].append(entry)
    with open(file_name, "w") as file:
        json.dump(data, file)

### Create df of general log info
Want info on log id, pulls, boss name & id, who was present

In [55]:
# Read in file
filename = 'log_details/' + log_ids[0] + '_log_details.txt'
with open(filename) as json_file:
    data = json.load(json_file)

In [56]:
data

{'fights': [{'id': 1,
   'start_time': 0,
   'end_time': 595127,
   'boss': 2092,
   'size': 20,
   'difficulty': 5,
   'kill': False,
   'partial': 3,
   'bossPercentage': 1918,
   'fightPercentage': 1572,
   'lastPhaseForPercentageDisplay': 4,
   'name': 'Argus the Unmaker'},
  {'id': 2,
   'start_time': 721714,
   'end_time': 1260627,
   'boss': 2092,
   'size': 20,
   'difficulty': 5,
   'kill': False,
   'partial': 3,
   'bossPercentage': 2993,
   'fightPercentage': 2453,
   'lastPhaseForPercentageDisplay': 4,
   'name': 'Argus the Unmaker'},
  {'id': 3,
   'start_time': 1365349,
   'end_time': 1726732,
   'boss': 2092,
   'size': 20,
   'difficulty': 5,
   'kill': False,
   'partial': 3,
   'bossPercentage': 5094,
   'fightPercentage': 4175,
   'lastPhaseForPercentageDisplay': 4,
   'name': 'Argus the Unmaker'},
  {'id': 4,
   'start_time': 1812855,
   'end_time': 2368530,
   'boss': 2092,
   'size': 20,
   'difficulty': 5,
   'kill': False,
   'partial': 3,
   'bossPercentage': 

In [57]:
data['fights']

[{'id': 1,
  'start_time': 0,
  'end_time': 595127,
  'boss': 2092,
  'size': 20,
  'difficulty': 5,
  'kill': False,
  'partial': 3,
  'bossPercentage': 1918,
  'fightPercentage': 1572,
  'lastPhaseForPercentageDisplay': 4,
  'name': 'Argus the Unmaker'},
 {'id': 2,
  'start_time': 721714,
  'end_time': 1260627,
  'boss': 2092,
  'size': 20,
  'difficulty': 5,
  'kill': False,
  'partial': 3,
  'bossPercentage': 2993,
  'fightPercentage': 2453,
  'lastPhaseForPercentageDisplay': 4,
  'name': 'Argus the Unmaker'},
 {'id': 3,
  'start_time': 1365349,
  'end_time': 1726732,
  'boss': 2092,
  'size': 20,
  'difficulty': 5,
  'kill': False,
  'partial': 3,
  'bossPercentage': 5094,
  'fightPercentage': 4175,
  'lastPhaseForPercentageDisplay': 4,
  'name': 'Argus the Unmaker'},
 {'id': 4,
  'start_time': 1812855,
  'end_time': 2368530,
  'boss': 2092,
  'size': 20,
  'difficulty': 5,
  'kill': False,
  'partial': 3,
  'bossPercentage': 3099,
  'fightPercentage': 2540,
  'lastPhaseForPercent

In [51]:
data['fights'][0]['difficulty']

5

In [54]:
data['fights'][0]['boss']

2092

In [58]:
# Collect fight info
df_list = []
for fight in data['fights']:
    df_list.append({
        'log_id': log_ids[0],
        'pull_id': fight['id'],
        'boss_id': fight['boss'],
        'boss_name': fight['name'],
        'difficulty': fight['difficulty']
    })

In [60]:
fight_data = pd.DataFrame(df_list, columns = ['log_id', 'pull_id', 'boss_id', 'boss_name', 'difficulty'])
fight_data

Unnamed: 0,log_id,pull_id,boss_id,boss_name,difficulty
0,qRpvXcKG6NAaDCJ4,1,2092,Argus the Unmaker,5
1,qRpvXcKG6NAaDCJ4,2,2092,Argus the Unmaker,5
2,qRpvXcKG6NAaDCJ4,3,2092,Argus the Unmaker,5
3,qRpvXcKG6NAaDCJ4,4,2092,Argus the Unmaker,5
4,qRpvXcKG6NAaDCJ4,5,2092,Argus the Unmaker,5
5,qRpvXcKG6NAaDCJ4,6,2092,Argus the Unmaker,5
6,qRpvXcKG6NAaDCJ4,7,2092,Argus the Unmaker,5
7,qRpvXcKG6NAaDCJ4,8,2092,Argus the Unmaker,5
8,qRpvXcKG6NAaDCJ4,9,2092,Argus the Unmaker,5
9,qRpvXcKG6NAaDCJ4,10,2092,Argus the Unmaker,5


In [17]:
data['friendlies']

[{'name': 'Withered Gift of the Lifebinder',
  'id': 66,
  'guid': 129386,
  'type': 'NPC',
  'fights': [{'id': 1, 'instances': 1, 'groups': 1},
   {'id': 2, 'instances': 1, 'groups': 3},
   {'id': 4, 'instances': 1, 'groups': 3},
   {'id': 5, 'instances': 1, 'groups': 2},
   {'id': 8, 'instances': 1, 'groups': 5},
   {'id': 10, 'instances': 1, 'groups': 2},
   {'id': 11, 'instances': 1, 'groups': 2},
   {'id': 13, 'instances': 1, 'groups': 2},
   {'id': 14, 'instances': 1, 'groups': 4}]},
 {'name': 'Vaelyra',
  'id': 20,
  'guid': 130076470,
  'type': 'DemonHunter',
  'fights': [{'id': 1},
   {'id': 2},
   {'id': 3},
   {'id': 4},
   {'id': 5},
   {'id': 6},
   {'id': 7},
   {'id': 8},
   {'id': 9},
   {'id': 10},
   {'id': 11},
   {'id': 12},
   {'id': 13},
   {'id': 14}]},
 {'name': 'Hati',
  'id': 25,
  'guid': 106551,
  'type': 'Pet',
  'fights': [{'id': 1, 'instances': 1}, {'id': 14, 'instances': 1}]},
 {'name': 'Psychodruid',
  'id': 18,
  'guid': 108734512,
  'type': 'Druid',
 

In [24]:
# Confirm types
type_list = []
for fight in data['friendlies']:
    type_list.append(fight['type'])

type_list

['NPC',
 'DemonHunter',
 'Pet',
 'Druid',
 'NPC',
 'Hunter',
 'Priest',
 'Hunter',
 'Pet',
 'NPC',
 'Warrior',
 'Paladin',
 'NPC',
 'Mage',
 'Paladin',
 'Priest',
 'Warrior',
 'NPC',
 'DeathKnight',
 'Warrior',
 'Warlock',
 'Druid',
 'Mage',
 'Mage',
 'Shaman',
 'Druid',
 'Druid']

In [30]:
for player in data['friendlies']:
    if player['type'] not in ['NPC', 'Pet']:
        print(player['name'])

Vaelyra
Psychodruid
Ayriea
Radiantldeal
Brian
Cagliostro
Uuglei
Metonymy
Acturus
Mythrose
Stradivarus
Divanance
Velryssa
Shadowbaine
Earrl
Tawñ
Petrol
Illestra
Shaami
Velsummers


In [32]:
for player in data['friendlies']:
    if player['type'] not in ['NPC', 'Pet']:
        for fight in player['fights']:
            print(player['name'])

Vaelyra
Vaelyra
Vaelyra
Vaelyra
Vaelyra
Vaelyra
Vaelyra
Vaelyra
Vaelyra
Vaelyra
Vaelyra
Vaelyra
Vaelyra
Vaelyra
Psychodruid
Psychodruid
Psychodruid
Psychodruid
Psychodruid
Psychodruid
Psychodruid
Psychodruid
Psychodruid
Psychodruid
Psychodruid
Psychodruid
Psychodruid
Psychodruid
Ayriea
Ayriea
Ayriea
Ayriea
Ayriea
Ayriea
Ayriea
Ayriea
Ayriea
Ayriea
Ayriea
Ayriea
Ayriea
Ayriea
Radiantldeal
Radiantldeal
Radiantldeal
Radiantldeal
Radiantldeal
Radiantldeal
Radiantldeal
Radiantldeal
Radiantldeal
Radiantldeal
Radiantldeal
Radiantldeal
Radiantldeal
Radiantldeal
Brian
Brian
Brian
Brian
Brian
Brian
Brian
Brian
Brian
Brian
Brian
Brian
Brian
Brian
Cagliostro
Cagliostro
Cagliostro
Cagliostro
Cagliostro
Cagliostro
Cagliostro
Cagliostro
Cagliostro
Cagliostro
Cagliostro
Cagliostro
Cagliostro
Cagliostro
Uuglei
Uuglei
Uuglei
Uuglei
Uuglei
Uuglei
Uuglei
Uuglei
Uuglei
Uuglei
Uuglei
Uuglei
Uuglei
Uuglei
Metonymy
Metonymy
Metonymy
Metonymy
Metonymy
Metonymy
Metonymy
Metonymy
Metonymy
Metonymy
Metonymy
Meton

In [27]:
data['friendlies'][0]['fights'][0]['id']

1

In [33]:
# Create list of players in each attempt
df_list = []
for player in data['friendlies']:
    if player['type'] not in ['NPC', 'Pet']:
        for fight in player['fights']:
            df_list.append({
                'pull_id': fight['id'],
                'player_name': player['name']
            })

In [34]:
player_data = pd.DataFrame(df_list, columns = ['pull_id', 'player_name'])
player_data

Unnamed: 0,pull_id,player_name
0,1,Vaelyra
1,2,Vaelyra
2,3,Vaelyra
3,4,Vaelyra
4,5,Vaelyra
5,6,Vaelyra
6,7,Vaelyra
7,8,Vaelyra
8,9,Vaelyra
9,10,Vaelyra


In [35]:
# Join two df's
df = fight_data.merge(player_data, how='left', on='pull_id')
df.head()

Unnamed: 0,log_id,pull_id,boss_id,boss_name,player_name
0,VhmB61LqvrRQPFwJ,1,2092,Argus the Unmaker,Vaelyra
1,VhmB61LqvrRQPFwJ,1,2092,Argus the Unmaker,Psychodruid
2,VhmB61LqvrRQPFwJ,1,2092,Argus the Unmaker,Ayriea
3,VhmB61LqvrRQPFwJ,1,2092,Argus the Unmaker,Radiantldeal
4,VhmB61LqvrRQPFwJ,1,2092,Argus the Unmaker,Brian


In [36]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 280 entries, 0 to 279
Data columns (total 5 columns):
log_id         280 non-null object
pull_id        280 non-null int64
boss_id        280 non-null int64
boss_name      280 non-null object
player_name    280 non-null object
dtypes: int64(2), object(3)
memory usage: 13.1+ KB


In [110]:
# Create empty df
df = pd.DataFrame([], columns = ['log_id', 'pull_id', 'boss_id', 'boss_name', 'difficulty', 'kill', 'player_name'])

# Read through all the files and create a final df fight summary info
for log_id in log_ids:
    # Open file
    filename = 'log_details/' + log_id + '_log_details.txt'
    with open(filename) as json_file:
        data = json.load(json_file)
    
    # Collect fight info
    df_list = []
    try:
        for fight in data['fights']:
            #print(fight['difficulty'])
            #break
            df_list.append({
                'log_id': log_id,
                'pull_id': fight['id'],
                'boss_id': fight['boss'],
                'boss_name': fight['name'],
                'difficulty': fight['difficulty'],
                'kill': fight['kill']
            })
    except KeyError:
        df_list.append({
                'log_id': log_id,
                'pull_id': fight['id'],
                'boss_id': fight['boss'],
                'boss_name': fight['name'],
                'difficulty': 'non-boss fight',
                'kill': 'non-boss fight'
            })
    # Convert to df
    fight_data = pd.DataFrame(df_list, columns = ['log_id', 'pull_id', 'boss_id', 'boss_name', 'difficulty', 'kill'])
    
    # Collect players for each attempt
    df_list = []
    for player in data['friendlies']:
        if player['type'] not in ['NPC', 'Pet']:
            for fight in player['fights']:
                df_list.append({
                    'pull_id': fight['id'],
                    'player_name': player['name']
                })
    # Convert to df
    player_data = pd.DataFrame(df_list, columns = ['pull_id', 'player_name'])
    
    # Merge df's
    merged_df = fight_data.merge(player_data, how='left', on='pull_id')
    
    # Add on to df
    df = pd.concat([df, merged_df])

In [95]:
df.head()

Unnamed: 0,log_id,pull_id,boss_id,boss_name,difficulty,kill,player_name
0,VhmB61LqvrRQPFwJ,1,2092,Argus the Unmaker,5,False,Vaelyra
1,VhmB61LqvrRQPFwJ,1,2092,Argus the Unmaker,5,False,Psychodruid
2,VhmB61LqvrRQPFwJ,1,2092,Argus the Unmaker,5,False,Ayriea
3,VhmB61LqvrRQPFwJ,1,2092,Argus the Unmaker,5,False,Radiantldeal
4,VhmB61LqvrRQPFwJ,1,2092,Argus the Unmaker,5,False,Brian


In [111]:
# Reorder the columns
col = ['log_id', 'pull_id', 'boss_id', 'boss_name', 'difficulty', 'kill', 'player_name']
df = df[col]

In [97]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 19258 entries, 0 to 52
Data columns (total 7 columns):
log_id         19258 non-null object
pull_id        19258 non-null object
boss_id        19258 non-null object
boss_name      19258 non-null object
difficulty     19258 non-null object
kill           19258 non-null object
player_name    19258 non-null object
dtypes: object(7)
memory usage: 1.2+ MB


In [98]:
df.nunique()

log_id         120
pull_id         42
boss_id         18
boss_name       36
difficulty       4
kill             3
player_name    147
dtype: int64

In [99]:
df.boss_name.unique()

array(['Argus the Unmaker', 'Garothi Worldbreaker', 'Aggramar',
       'Unstable Felshard', 'Felhounds of Sargeras',
       'Antoran High Command', 'Portal Keeper Hasabel',
       'The Defense of Eonar', 'Imonar the Soulhunter', "Kin'garoth",
       'Varimathras', 'The Coven of Shivarra', 'Dark Keeper Aedis',
       'Garothi Decimator', 'Riftworld Assistant', 'Garothi Annihilator',
       'Priestess of Delirium', 'Hulking Demolisher', 'Flameweaver',
       'Resilient Roach', 'Unknown', 'Garothi Demolisher', 'Dark Keeper',
       'Antoran Felguard', 'Bilescourge', 'Antoran Doomguard',
       'Bladesworn Ravager', 'Grand Magistrix Elisande', "Gul'dan",
       'Odyn', 'Guarm', 'Helya', 'Goroth', 'Fel-Charged Obfuscator',
       'Clubfist Beastlord', 'Slobbering Fiend'], dtype=object)

In [72]:
antorus_fights = ['Argus the Unmaker', 
                  'Garothi Worldbreaker', 
                  'Aggramar', 
                  'Felhounds of Sargeras', 
                  'Antoran High Command', 
                  'Portal Keeper Hasabel', 
                  'The Defense of Eonar', 
                  'Imonar the Soulhunter', 
                  "Kin'garoth", 
                  'Varimathras', 
                  'The Coven of Shivarra']

In [112]:
# Collect only Argus bosses
df = df[df.boss_name.isin(antorus_fights)]

In [75]:
df.difficulty.unique()

array([5, 'non-boss fight', 4, 3], dtype=object)

In [113]:
# Collect only mythic difficulty
df = df.query('difficulty == 5')

In [102]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15250 entries, 0 to 19
Data columns (total 7 columns):
log_id         15250 non-null object
pull_id        15250 non-null object
boss_id        15250 non-null object
boss_name      15250 non-null object
difficulty     15250 non-null object
kill           15250 non-null object
player_name    15250 non-null object
dtypes: object(7)
memory usage: 953.1+ KB


In [103]:
# Check player names
df.player_name.unique()

array(['Vaelyra', 'Psychodruid', 'Ayriea', 'Radiantldeal', 'Brian',
       'Cagliostro', 'Uuglei', 'Metonymy', 'Acturus', 'Mythrose',
       'Stradivarus', 'Divanance', 'Velryssa', 'Shadowbaine', 'Earrl',
       'Tawñ', 'Petrol', 'Illestra', 'Shaami', 'Velsummers', 'Eleint',
       'Tinytiki', 'Daffy', 'Kamer', 'Shallowfall', 'Demonhoney',
       'Èllipses', 'Au', 'Future', 'Pulsè', 'Undertakerop',
       'Essence of Eonar', 'Kyarrix', 'Yoked', 'Bryiah', 'Zephyyra',
       'Znoch', 'Thebadlock', 'Brianmurican', 'Googboog', 'Elzam',
       'Cinzia'], dtype=object)

In [82]:
len(df.player_name.unique())

41

In [114]:
# Remove Eonar!
df = df.query('player_name != "Essence of Eonar"')

In [105]:
df.nunique()

log_id         55
pull_id        42
boss_id        11
boss_name      11
difficulty      1
kill            2
player_name    41
dtype: int64

In [106]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15231 entries, 0 to 19
Data columns (total 7 columns):
log_id         15231 non-null object
pull_id        15231 non-null object
boss_id        15231 non-null object
boss_name      15231 non-null object
difficulty     15231 non-null object
kill           15231 non-null object
player_name    15231 non-null object
dtypes: object(7)
memory usage: 951.9+ KB


In [107]:
df.kill.unique()

array([False, True], dtype=object)

In [115]:
# Change kill to bool
df.kill = df.kill.astype('bool')

In [116]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15231 entries, 0 to 19
Data columns (total 7 columns):
log_id         15231 non-null object
pull_id        15231 non-null object
boss_id        15231 non-null object
boss_name      15231 non-null object
difficulty     15231 non-null object
kill           15231 non-null bool
player_name    15231 non-null object
dtypes: bool(1), object(6)
memory usage: 847.8+ KB


In [117]:
df

Unnamed: 0,log_id,pull_id,boss_id,boss_name,difficulty,kill,player_name
0,VhmB61LqvrRQPFwJ,1,2092,Argus the Unmaker,5,False,Vaelyra
1,VhmB61LqvrRQPFwJ,1,2092,Argus the Unmaker,5,False,Psychodruid
2,VhmB61LqvrRQPFwJ,1,2092,Argus the Unmaker,5,False,Ayriea
3,VhmB61LqvrRQPFwJ,1,2092,Argus the Unmaker,5,False,Radiantldeal
4,VhmB61LqvrRQPFwJ,1,2092,Argus the Unmaker,5,False,Brian
5,VhmB61LqvrRQPFwJ,1,2092,Argus the Unmaker,5,False,Cagliostro
6,VhmB61LqvrRQPFwJ,1,2092,Argus the Unmaker,5,False,Uuglei
7,VhmB61LqvrRQPFwJ,1,2092,Argus the Unmaker,5,False,Metonymy
8,VhmB61LqvrRQPFwJ,1,2092,Argus the Unmaker,5,False,Acturus
9,VhmB61LqvrRQPFwJ,1,2092,Argus the Unmaker,5,False,Mythrose


In [118]:
df.to_csv('master_list.csv', index=False)