# Practice your skills loading data from a CSV file
In this exercise, you will load a CSV file and you'll perform some operations on it to extract data. If you know how to do this with the Pandas library, you can use that. If not, you can use the csv library with the ready-to-use example this notebook provides.

In [1]:
import pandas as pd 

In [9]:
df = pd.read_csv("sample_data/wine-ratings-small.csv")
df.head(20)

Unnamed: 0.1,Unnamed: 0,name,grape,region,variety,rating,notes
0,0,Laurenz V Charming Gruner Veltliner 2013,,"Kamptal, Austria",White Wine,90.0,Aromas of ripe apples and a typical Veltliner ...
1,1,Laurenz V Charming Gruner Veltliner 2014,,"Kamptal, Austria",White Wine,90.0,Aromas of ripe apples and a typical Veltliner ...
2,2,Laurenz V Singing Gruner Veltliner 2007,,Austria,White Wine,90.0,"A very attractive fruit bouquet yields apple, ..."
3,3,Laurenz V Singing Gruner Veltliner 2010,,Austria,White Wine,88.0,"A very attractive fruit bouquet yields apple, ..."
4,4,Laurenz V Singing Gruner Veltliner 2011,,Austria,White Wine,88.0,"A very attractive fruit bouquet yields apple, ..."
5,5,Laurenz V Singing Gruner Veltliner 2013,,Austria,White Wine,89.0,"A very attractive fruit bouquet yields apple, ..."
6,6,Lava Cap American River Red,,"El Dorado, Sierra Foothills, California",Red Wine,90.0,This wine was created as a table wine. We want...
7,7,Lava Cap Barbera 2010,,"Sierra Foothills, California",Red Wine,90.0,"The plump, rich cherry, raspberry and plum fru..."
8,8,Lava Cap Battonage Chardonnay 2012,,"Sierra Foothills, California",White Wine,91.0,This perfectly balanced wine begins with invit...
9,9,Lava Cap Cabernet Sauvignon 2013,,"El Dorado, Sierra Foothills, California",Red Wine,90.0,Lava Cap’s estate grown Cabernet Sauvignon gra...


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 780 entries, 0 to 779
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  780 non-null    int64  
 1   name        780 non-null    object 
 2   grape       0 non-null      float64
 3   region      780 non-null    object 
 4   variety     777 non-null    object 
 5   rating      780 non-null    float64
 6   notes       775 non-null    object 
dtypes: float64(2), int64(1), object(4)
memory usage: 42.8+ KB


In [6]:
wines_type = df['variety']
wines_type.head()

0    White Wine
1    White Wine
2    White Wine
3    White Wine
4    White Wine
Name: variety, dtype: object

In [7]:
wines_type.unique()

array(['White Wine', 'Red Wine', 'Sparkling & Champagne', 'Pink and Rosé',
       nan, 'Collectible'], dtype=object)

In [8]:
wines_type.nunique()

5

In [11]:
df['grapes'] = df['grape'].fillna(0).round(2)
df.head()

Unnamed: 0.1,Unnamed: 0,name,grape,region,variety,rating,notes,grapes
0,0,Laurenz V Charming Gruner Veltliner 2013,,"Kamptal, Austria",White Wine,90.0,Aromas of ripe apples and a typical Veltliner ...,0.0
1,1,Laurenz V Charming Gruner Veltliner 2014,,"Kamptal, Austria",White Wine,90.0,Aromas of ripe apples and a typical Veltliner ...,0.0
2,2,Laurenz V Singing Gruner Veltliner 2007,,Austria,White Wine,90.0,"A very attractive fruit bouquet yields apple, ...",0.0
3,3,Laurenz V Singing Gruner Veltliner 2010,,Austria,White Wine,88.0,"A very attractive fruit bouquet yields apple, ...",0.0
4,4,Laurenz V Singing Gruner Veltliner 2011,,Austria,White Wine,88.0,"A very attractive fruit bouquet yields apple, ...",0.0


In [12]:
df.drop(['grape'], axis=1)

Unnamed: 0.1,Unnamed: 0,name,region,variety,rating,notes,grapes
0,0,Laurenz V Charming Gruner Veltliner 2013,"Kamptal, Austria",White Wine,90.0,Aromas of ripe apples and a typical Veltliner ...,0.0
1,1,Laurenz V Charming Gruner Veltliner 2014,"Kamptal, Austria",White Wine,90.0,Aromas of ripe apples and a typical Veltliner ...,0.0
2,2,Laurenz V Singing Gruner Veltliner 2007,Austria,White Wine,90.0,"A very attractive fruit bouquet yields apple, ...",0.0
3,3,Laurenz V Singing Gruner Veltliner 2010,Austria,White Wine,88.0,"A very attractive fruit bouquet yields apple, ...",0.0
4,4,Laurenz V Singing Gruner Veltliner 2011,Austria,White Wine,88.0,"A very attractive fruit bouquet yields apple, ...",0.0
...,...,...,...,...,...,...,...
775,775,Lewis Cellars Syrah Ethan's Blend 2005,California,Red Wine,92.0,"Now 7 years old, Alec's younger brother Ethan ...",0.0
776,776,Lewis Cellars Syrah Ethan's Blend 2014,California,Red Wine,93.0,"Born in 2000, everything about Ethan and his n...",0.0
777,777,Lewis Cellars Syrah Ethan's Blend 2009,California,Red Wine,94.0,"Positioned between brothers, Alec and Mason, E...",0.0
778,778,Lexington Apex Red 2011,"Santa Cruz Mountains, California",Red Wine,91.0,"Blend: 99% Cabernet Sauvignon, 1% Merlot",0.0


In [13]:
region = df['region']
region.head()

0    Kamptal, Austria
1    Kamptal, Austria
2             Austria
3             Austria
4             Austria
Name: region, dtype: object

In [14]:
region.unique()

array(['Kamptal, Austria', 'Austria',
       'El Dorado, Sierra Foothills, California',
       'Sierra Foothills, California', 'California',
       'Knights Valley, Sonoma County, California',
       'Napa Valley, California', 'Chateauneuf-du-Pape, Rhone, France',
       'Gigondas, Rhone, France', 'Rasteau, Rhone, France',
       'Paso Robles, Central Coast, California',
       'Central Coast, California',
       'Willow Creek District, Paso Robles, Central Coast, California',
       'Eola-Amity Hills, Willamette Valley, Oregon',
       'Yamhill-Carlton District, Willamette Valley, Oregon',
       'Dundee Hills, Willamette Valley, Oregon',
       'Willamette Valley, Oregon', 'Portugal', 'New Zealand',
       'Marlborough, New Zealand', 'Spain', 'Argentina', 'Italy',
       'South Australia, Australia', 'Tuscany, Italy', 'South Africa',
       'Champagne, France', 'St. Emilion, Bordeaux, France',
       'Sicily, Italy', 'Rhone, France', 'Montalcino, Tuscany, Italy',
       'France', 'Ca

In [15]:
region.nunique()

86

In [17]:
rate = df['rating']
rate.info()

<class 'pandas.core.series.Series'>
RangeIndex: 780 entries, 0 to 779
Series name: rating
Non-Null Count  Dtype  
--------------  -----  
780 non-null    float64
dtypes: float64(1)
memory usage: 6.2 KB


In [23]:
rate.describe().round(2)

count    780.00
mean      91.42
std        1.93
min       85.00
25%       90.00
50%       91.00
75%       93.00
max       99.00
Name: rating, dtype: float64

In [39]:
rating_betn = df.loc[(df['rating'] > 89) & (df['rating'] < 93), ['rating','name']] 

In [40]:
print(rating_betn)

     rating                                          name
0      90.0      Laurenz V Charming Gruner Veltliner 2013
1      90.0      Laurenz V Charming Gruner Veltliner 2014
2      90.0       Laurenz V Singing Gruner Veltliner 2007
6      90.0                   Lava Cap American River Red
7      90.0                         Lava Cap Barbera 2010
..      ...                                           ...
773    92.0          Lewis Cellars Sonoma Chardonnay 2017
774    91.0   Lewis Cellars Sonoma Valley Chardonnay 2008
775    92.0        Lewis Cellars Syrah Ethan's Blend 2005
778    91.0                       Lexington Apex Red 2011
779    91.0  Lexington Gist Ranch Cabernet Sauvignon 2011

[514 rows x 2 columns]


In [42]:
rating_betn.count()

rating    514
name      514
dtype: int64

In [48]:
df.drop(['grape'], axis=1, inplace=True)

In [51]:
df = df.where(pd.notnull(df), None)

In [52]:
df

Unnamed: 0.1,Unnamed: 0,name,region,variety,rating,notes,grapes
0,0,Laurenz V Charming Gruner Veltliner 2013,"Kamptal, Austria",White Wine,90.0,Aromas of ripe apples and a typical Veltliner ...,0.0
1,1,Laurenz V Charming Gruner Veltliner 2014,"Kamptal, Austria",White Wine,90.0,Aromas of ripe apples and a typical Veltliner ...,0.0
2,2,Laurenz V Singing Gruner Veltliner 2007,Austria,White Wine,90.0,"A very attractive fruit bouquet yields apple, ...",0.0
3,3,Laurenz V Singing Gruner Veltliner 2010,Austria,White Wine,88.0,"A very attractive fruit bouquet yields apple, ...",0.0
4,4,Laurenz V Singing Gruner Veltliner 2011,Austria,White Wine,88.0,"A very attractive fruit bouquet yields apple, ...",0.0
...,...,...,...,...,...,...,...
775,775,Lewis Cellars Syrah Ethan's Blend 2005,California,Red Wine,92.0,"Now 7 years old, Alec's younger brother Ethan ...",0.0
776,776,Lewis Cellars Syrah Ethan's Blend 2014,California,Red Wine,93.0,"Born in 2000, everything about Ethan and his n...",0.0
777,777,Lewis Cellars Syrah Ethan's Blend 2009,California,Red Wine,94.0,"Positioned between brothers, Alec and Mason, E...",0.0
778,778,Lexington Apex Red 2011,"Santa Cruz Mountains, California",Red Wine,91.0,"Blend: 99% Cabernet Sauvignon, 1% Merlot",0.0


In [53]:
import json

with open("wine_data.json", "w") as f:
    json.dump(df.to_dict(orient="records"), f)
