In [8]:
import json
import pandas as pd

I used a series of nested dictionaries to store my bjj_matches.JSON, but this has been a convoluted way to organize that data and I want it to fit into a DataFrame without having to access a set of nested dictionaries. 

In [9]:
df = pd.read_json('bjjheroes_matches.json')
df.head(5)

Unnamed: 0,matches,name
0,"[{'method': 'Inside heel hook', 'outcome': 'L'...",Aaron Johnson
1,"[{'method': 'Referee Decision', 'outcome': 'L'...",Abdurakhman Bilarov
2,"[{'method': 'Adv', 'outcome': 'L', 'weight': '...",Abraham Marte
3,"[{'method': 'Referee Decision', 'outcome': 'L'...",Adam Benayoun
4,"[{'method': 'RNC', 'outcome': 'L', 'weight': '...",Adam Ferrara


I'm going to make it a little neater by opening the matches list for each fighter and adding their name onto each individual match dictionary. First I'm going to open the JSON file and set it as data.

In [10]:
with open('bjjheroes_matches.json', 'r') as file:
    data = json.load(file)

Now I'm going to take the first element as a test and figure out how to add their name to each of their matches.

In [11]:
data[0]['matches']

[{'method': 'Inside heel hook',
  'outcome': 'L',
  'weight': 'ABS',
  'year': '2015'},
 {'method': 'RNC', 'outcome': 'L', 'weight': '94KG', 'year': '2015'},
 {'method': 'Heel hook', 'outcome': 'L', 'weight': 'ABS', 'year': '2015'},
 {'method': 'Points', 'outcome': 'L', 'weight': '94KG', 'year': '2016'},
 {'method': 'N/A', 'outcome': 'L', 'weight': '94KG', 'year': '2016'},
 {'method': 'Cross choke', 'outcome': 'L', 'weight': 'HWABS', 'year': '2016'},
 {'method': 'Choke', 'outcome': 'L', 'weight': 'ABS', 'year': '2016'},
 {'method': 'Triangle', 'outcome': 'L', 'weight': '100KG', 'year': '2016'},
 {'method': 'Triangle armbar',
  'outcome': 'L',
  'weight': 'ABS',
  'year': '2016'},
 {'method': 'Choke from back',
  'outcome': 'L',
  'weight': 'ABS',
  'year': '2016'},
 {'method': 'Points', 'outcome': 'L', 'weight': 'ABS', 'year': '2016'},
 {'method': 'Inside heel hook',
  'outcome': 'L',
  'weight': '84KG',
  'year': '2016'},
 {'method': 'Arm in guillotine',
  'outcome': 'L',
  'weight': 

Maybe I can iterate through each element of the matches list and add the name attached to the key 'name'. I'll go ahead and set this to a new variable instead of overwriting the old one. We'll call it test_matches

In [12]:
test_matches = []
for item in data[0]['matches']:
    new_item = item
    new_item['name'] = data[0]['name']
    test_matches.append(item)
test_matches[:2]

[{'method': 'Inside heel hook',
  'outcome': 'L',
  'weight': 'ABS',
  'year': '2015',
  'name': 'Aaron Johnson'},
 {'method': 'RNC',
  'outcome': 'L',
  'weight': '94KG',
  'year': '2015',
  'name': 'Aaron Johnson'}]

Cool, now lets see if we can do that for all of them. I'll rewrite the above code as a function.

In [13]:
def fix_matches_list(fighter_dict):
    new_matches = []
    for item in fighter_dict['matches']:
        new_item = item
        new_item['name'] = fighter_dict['name']
        new_matches.append(item)
    return new_matches

fix_matches_list(data[0]) == test_matches

True

Now that we have that working we can apply it to every fighter in the list.

In [14]:
new_data = [fix_matches_list(fighter_dict) for fighter_dict in data]
print(new_data[0][:3])
print(new_data[-1][:3])

[{'method': 'Inside heel hook', 'outcome': 'L', 'weight': 'ABS', 'year': '2015', 'name': 'Aaron Johnson'}, {'method': 'RNC', 'outcome': 'L', 'weight': '94KG', 'year': '2015', 'name': 'Aaron Johnson'}, {'method': 'Heel hook', 'outcome': 'L', 'weight': 'ABS', 'year': '2015', 'name': 'Aaron Johnson'}]
[{'method': 'N/A', 'outcome': 'L', 'weight': '82KG', 'year': '2016', 'name': 'Vinicius Garcia'}, {'method': 'Armbar', 'outcome': 'L', 'weight': '82KG', 'year': '2016', 'name': 'Vinicius Garcia'}, {'method': 'Points', 'outcome': 'L', 'weight': '82KG', 'year': '2016', 'name': 'Vinicius Garcia'}]


The only downside to this is that they are not all in the same list, but rather they are each in their own list. Just out of curiosity, I'll see what happens when I try setting this as a dataframe

In [15]:
test_df = pd.DataFrame(new_data)
test_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,283,284,285,286,287,288,289,290,291,292
0,"{'method': 'Inside heel hook', 'outcome': 'L',...","{'method': 'RNC', 'outcome': 'L', 'weight': '9...","{'method': 'Heel hook', 'outcome': 'L', 'weigh...","{'method': 'Points', 'outcome': 'L', 'weight':...","{'method': 'N/A', 'outcome': 'L', 'weight': '9...","{'method': 'Cross choke', 'outcome': 'L', 'wei...","{'method': 'Choke', 'outcome': 'L', 'weight': ...","{'method': 'Triangle', 'outcome': 'L', 'weight...","{'method': 'Triangle armbar', 'outcome': 'L', ...","{'method': 'Choke from back', 'outcome': 'L', ...",...,,,,,,,,,,
1,"{'method': 'Referee Decision', 'outcome': 'L',...","{'method': 'Pts: 8x2', 'outcome': 'L', 'weight...","{'method': 'Referee Decision', 'outcome': 'L',...","{'method': 'DQ', 'outcome': 'L', 'weight': '94...","{'method': 'Pts: 5x0', 'outcome': 'L', 'weight...","{'method': 'Arm in guillotine', 'outcome': 'L'...","{'method': 'Referee Decision', 'outcome': 'L',...","{'method': 'Points', 'outcome': 'L', 'weight':...","{'method': 'Kneebar', 'outcome': 'L', 'weight'...","{'method': 'Pts: 5x0', 'outcome': 'L', 'weight...",...,,,,,,,,,,
2,"{'method': 'Adv', 'outcome': 'L', 'weight': 'O...","{'method': 'Kneebar', 'outcome': 'L', 'weight'...","{'method': 'DQ', 'outcome': 'L', 'weight': 'O1...","{'method': 'Estima lock', 'outcome': 'L', 'wei...","{'method': 'Choke from back', 'outcome': 'L', ...","{'method': 'Armbar', 'outcome': 'L', 'weight':...","{'method': 'wristlock', 'outcome': 'L', 'weigh...","{'method': 'Armbar', 'outcome': 'L', 'weight':...","{'method': 'Pts: 2x0', 'outcome': 'L', 'weight...","{'method': 'Pts: 3x0', 'outcome': 'L', 'weight...",...,,,,,,,,,,
3,"{'method': 'Referee Decision', 'outcome': 'L',...","{'method': 'Points', 'outcome': 'L', 'weight':...","{'method': 'Pts: 2x2, Adv', 'outcome': 'L', 'w...","{'method': 'Pts: 8x6', 'outcome': 'L', 'weight...","{'method': 'Arm in Ezekiel', 'outcome': 'L', '...","{'method': 'Points', 'outcome': 'L', 'weight':...","{'method': 'Kneebar', 'outcome': 'L', 'weight'...","{'method': 'Pts: 9x4', 'outcome': 'L', 'weight...","{'method': 'Choke from back', 'outcome': 'L', ...","{'method': 'Referee Decision', 'outcome': 'L',...",...,,,,,,,,,,
4,"{'method': 'RNC', 'outcome': 'L', 'weight': '6...","{'method': 'Pts: 4x2', 'outcome': 'L', 'weight...","{'method': 'Choke from back', 'outcome': 'L', ...","{'method': 'Adv', 'outcome': 'L', 'weight': '6...","{'method': 'Pts: 2x2, Adv', 'outcome': 'L', 'w...","{'method': 'Referee Decision', 'outcome': 'L',...","{'method': 'Pts: 0x0, Adv', 'outcome': 'L', 'w...","{'method': 'Referee Decision', 'outcome': 'L',...","{'method': 'Points', 'outcome': 'L', 'weight':...","{'method': 'Kneebar', 'outcome': 'L', 'weight'...",...,,,,,,,,,,


This is absolutely not what I wanted, so let's combine all of these lists and see what kind of dataframe we get there.

In [16]:
combined_list = []
for item in new_data:
    combined_list+=item

new_df = pd.DataFrame(combined_list)
new_df.head()

Unnamed: 0,method,outcome,weight,year,name
0,Inside heel hook,L,ABS,2015,Aaron Johnson
1,RNC,L,94KG,2015,Aaron Johnson
2,Heel hook,L,ABS,2015,Aaron Johnson
3,Points,L,94KG,2016,Aaron Johnson
4,,L,94KG,2016,Aaron Johnson


This is a much better format and should allow me to manipulate this data much more efficiently than before. I'm gonna save this as a csv for now and move on to reshaping this data so i can visualize it.

In [17]:
new_df.to_csv('bjjheroes_matches.csv')