In [1]:
import pandas as pd
import seaborn as sns

### Read in the public art data, drop the `Mapped Location` column and rename columns to lower case without spaces

In [2]:
art = pd.read_csv('../data/public_art.csv')
art.head(2)

Unnamed: 0,Title,Last Name,First Name,Location,Medium,Type,Description,Latitude,Longitude,Mapped Location
0,[Cross Country Runners],Frost,Miley,"4001 Harding Rd., Nashville TN",Bronze,Sculpture,,36.12856,-86.8366,"(36.12856, -86.8366)"
1,[Fourth and Commerce Sculpture],Walker,Lin,"333 Commerce Street, Nashville TN",,Sculpture,,36.16234,-86.77774,"(36.16234, -86.77774)"


In [3]:
art = art.drop(columns = ['Mapped Location'])
art = art.rename(columns = {'Title': 'title', 'Last Name': 'last_name', 
                            'First Name': 'first_name', 'Location': 'loc', 
                            'Medium': 'medium',  'Type': 'art_type', 'Description': 'desc', 
                            'Latitude': 'lat', 'Longitude': 'lng'})

### More exploration with pandas

 - .isnull.sum()
 - .value_counts()
 - .to_frame()
 - .reset_index()
 - .replace()
 - .describe()
 

In [4]:
art.head(2)

Unnamed: 0,title,last_name,first_name,loc,medium,art_type,desc,lat,lng
0,[Cross Country Runners],Frost,Miley,"4001 Harding Rd., Nashville TN",Bronze,Sculpture,,36.12856,-86.8366
1,[Fourth and Commerce Sculpture],Walker,Lin,"333 Commerce Street, Nashville TN",,Sculpture,,36.16234,-86.77774


#### Are there missing values in the `art` dataframe?

In [5]:
art.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 132 entries, 0 to 131
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   title       132 non-null    object 
 1   last_name   132 non-null    object 
 2   first_name  122 non-null    object 
 3   loc         131 non-null    object 
 4   medium      128 non-null    object 
 5   art_type    132 non-null    object 
 6   desc        87 non-null     object 
 7   lat         132 non-null    float64
 8   lng         132 non-null    float64
dtypes: float64(2), object(7)
memory usage: 9.4+ KB


In [6]:
art.isnull().sum()

title          0
last_name      0
first_name    10
loc            1
medium         4
art_type       0
desc          45
lat            0
lng            0
dtype: int64

#### Determine how many of each art type exists in the `art` dataframe

In [7]:
art.art_type.value_counts()

Sculpture             61
Mural                 38
Monument              16
Mosaic                 2
Various                2
Frieze                 2
Mobile                 2
Sculpture/Fountain     1
Bronzes                1
Relief                 1
Stained Glass          1
Street Art             1
mural                  1
Multipart              1
Furniture              1
Fountain               1
Name: art_type, dtype: int64

#### Save the counts of each type to a new variable - is the new structure a dataframe?

In [8]:
type_counts = art.art_type.value_counts()

In [9]:
type(type_counts)

pandas.core.series.Series

In [10]:
#make series a df
type_counts = type_counts.to_frame()

print(type(type_counts))
type_counts.head(2)

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,art_type
Sculpture,61
Mural,38


In [20]:
#reset index resets to 0-based index and moves existing index to a column
type_counts = type_counts.reset_index()
type_counts.head(5)

Unnamed: 0,index,art_type,count
0,0,Sculpture,61
1,1,Mural,38
2,2,Monument,16
3,3,Mosaic,2
4,4,Various,2


In [12]:
type_counts.columns = ['art_type', 'count']
type_counts.head(3)
type_counts.shape

(16, 2)

#### look at descriptive statistics for the `type_counts`

In [13]:
type_counts.describe()

Unnamed: 0,count
count,16.0
mean,8.25
std,17.043083
min,1.0
25%,1.0
50%,1.0
75%,2.0
max,61.0


#### What are the uniques art types 

In [14]:
type_counts.art_type.unique()

array(['Sculpture', 'Mural', 'Monument', 'Mosaic', 'Various', 'Frieze',
       'Mobile', 'Sculpture/Fountain', 'Bronzes', 'Relief',
       'Stained Glass', 'Street Art', 'mural', 'Multipart', 'Furniture',
       'Fountain'], dtype=object)

### Cleanup time!
- change **mural** to **Mural**
- change **Sculpture/Fountain** to **Fountain**
- change **Bronzes** to **Sculpture**

1. Create a dictionary that maps the current values to the new ones
2. Use the `.replace()` method, passing it the mapping dictionary

In [15]:
art_map_dictionary = {'mural': 'Mural', 'Sculpture/Fountain': 'Fountain', 'Bronzes': 'Sculpture' }

In [16]:
art.art_type = art.art_type.replace(art_map_dictionary)

In [17]:
art.art_type.value_counts()

Sculpture        62
Mural            39
Monument         16
Mosaic            2
Various           2
Frieze            2
Fountain          2
Mobile            2
Relief            1
Stained Glass     1
Street Art        1
Multipart         1
Furniture         1
Name: art_type, dtype: int64

#### Load the iris data from seaborn and look at how the average sepal width, sepal length, petal width, and petal length varies by species
 - which species has the smallest petals?


In [18]:
iris_df = sns.load_dataset('iris')
print(iris_df.shape)
iris_df.head()

(150, 5)


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [19]:
iris_df.groupby('species').mean()

Unnamed: 0_level_0,sepal_length,sepal_width,petal_length,petal_width
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
setosa,5.006,3.428,1.462,0.246
versicolor,5.936,2.77,4.26,1.326
virginica,6.588,2.974,5.552,2.026
