Notebook to load data from wine-ratings CSV file and transform it into JSON format

In [1]:
import pandas as pd

wine_df = pd.read_csv('./sample_data/wine-ratings-small.csv')
print(wine_df.columns)
print(wine_df.shape)

Index(['Unnamed: 0', 'name', 'grape', 'region', 'variety', 'rating', 'notes'], dtype='object')
(780, 7)


In [2]:
# Apply filters to select a subset
#temp_df = wine_df[(wine_df['variety']=='Red Wine') & (wine_df['rating']>=95) & (wine_df['region'].str.contains('Italy'))].copy()
# Or, apply masks to select a subset
variety_mask = wine_df['variety']=='Red Wine'
rating_mask = wine_df['rating']>=95
region_mask = wine_df['region'].str.contains('Italy')
temp_df = wine_df[(variety_mask & rating_mask & region_mask)]
temp_df.reset_index(inplace=True)
print(f'# in subset = {len(temp_df)}')
temp_df.head()

# in subset = 5


Unnamed: 0.1,index,Unnamed: 0,name,grape,region,variety,rating,notes
0,119,119,Le Chiuse Brunello di Montalcino 2013,,"Montalcino, Tuscany, Italy",Red Wine,96.0,Ruby red in color with light granite hues. The...
1,120,120,Le Chiuse Brunello di Montalcino 2015,,"Montalcino, Tuscany, Italy",Red Wine,95.0,Ruby red in color with light granite hues. The...
2,122,122,Le Chiuse Brunello di Montalcino (375ML half-b...,,"Montalcino, Tuscany, Italy",Red Wine,96.0,Ruby red in color with light granite hues. The...
3,222,222,Le Potazzine Gorelli Brunello di Montalcino 2012,,"Montalcino, Tuscany, Italy",Red Wine,96.0,Bright red garnet color and shiny. Strong olfa...
4,261,261,Le Salette Pergole Vece Amarone 2015,,"Veneto, Italy",Red Wine,95.0,Ruby red with violet reflections. Intense and ...


In [25]:
#Convert selected columns from DataFrame to JSON
json_out = temp_df[['name', 'region', 'variety', 'rating']].to_json() #(orient='records')
print(f'type = {type(json_out)}')
json_out

type = <class 'str'>


'{"name":{"0":"Le Chiuse Brunello di Montalcino 2013","1":"Le Chiuse Brunello di Montalcino 2015","2":"Le Chiuse Brunello di Montalcino (375ML half-bottle) 2013","3":"Le Potazzine Gorelli Brunello di Montalcino 2012","4":"Le Salette Pergole Vece Amarone 2015"},"region":{"0":"Montalcino, Tuscany, Italy","1":"Montalcino, Tuscany, Italy","2":"Montalcino, Tuscany, Italy","3":"Montalcino, Tuscany, Italy","4":"Veneto, Italy"},"variety":{"0":"Red Wine","1":"Red Wine","2":"Red Wine","3":"Red Wine","4":"Red Wine"},"rating":{"0":96.0,"1":95.0,"2":96.0,"3":96.0,"4":95.0}}'

In [26]:
# now use the `.dump()` JSON method (note no 's'!) to save it to a new JSON file
import json
with open('sample_data/wine-filtered.json', 'w') as f:
    json.dump(json_out, f)

In [27]:
# read the JSON file and then parse it using the `.load()` method
# note the subtle difference, this is the `.load()` method (no 's'), not `.loads()`
with open('sample_data/wine-filtered.json') as f:
    loaded_json = json.load(f)
# Now load it into Python
# mnemonic: loads -> LOAD from String
loaded_json = json.loads(loaded_json)
print(loaded_json.keys())
print(f"Number of items: {len(loaded_json['name'])}")

dict_keys(['name', 'region', 'variety', 'rating'])
Number of items: 5


In [31]:
loaded_json['name']

{'0': 'Le Chiuse Brunello di Montalcino 2013',
 '1': 'Le Chiuse Brunello di Montalcino 2015',
 '2': 'Le Chiuse Brunello di Montalcino (375ML half-bottle) 2013',
 '3': 'Le Potazzine Gorelli Brunello di Montalcino 2012',
 '4': 'Le Salette Pergole Vece Amarone 2015'}

In [32]:
loaded_json['name']['1']

'Le Chiuse Brunello di Montalcino 2015'