# Configuring pandas

### Documentation - https://pandas.pydata.org/docs/reference/index.html

In [212]:
# import numpy and pandas
import numpy as np
import pandas as pd

# used for dates
import datetime
from datetime import datetime, date

# Set some pandas options controlling output format
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 8)
pd.set_option('display.max_rows', 10)
pd.set_option('display.width', 80)
    
# bring in matplotlib for graphics
import matplotlib.pyplot as plt
%matplotlib inline

# The pandas Series
#### A Pandas Series is like a column in a table. It is a one-dimensional array holding data of any type. Read the documentation and create a series that contains 4 items. After that finish the following given cells.

In [213]:
# create a four item Series
new1 = pd.Series([1, 2, 3, 4])
new1

0    1
1    2
2    3
3    4
dtype: int64

In [214]:
# get value at label 1
new1[1]

2

In [215]:
# Write a snippet of code that will return a Series with the row with labels 1 and 3
new1[[1,3]]

1    2
3    4
dtype: int64

In [216]:
# create the same series but this time using an explicit index
new2 = new1 = pd.Series([1, 2, 3, 4])


In [217]:
# look up items the series having index at first and index at last position
new2[0:3]
new2

0    1
1    2
2    3
3    4
dtype: int64

In [218]:
# get only the index of the Series
new2.index

RangeIndex(start=0, stop=4, step=1)

In [219]:
# create a Series who's index is a series of dates
# https://pandas.pydata.org/docs/reference/api/pandas.date_range.html
# between the two specified dates (inclusive)
# Dates: September 5, 2022 to September 11, 2022
new3 = pd.date_range('09/05/2022', '09/12/2022')
new3

DatetimeIndex(['2022-09-05', '2022-09-06', '2022-09-07', '2022-09-08',
               '2022-09-09', '2022-09-10', '2022-09-11', '2022-09-12'],
              dtype='datetime64[ns]', freq='D')

In [220]:
# create a Series with values (representing temperatures) for each date in the index
# You can give them hardcoded values for now [80, 82, 85, 90, 83, 87, 80, 78]
# You can call this series clt_temp
clt_temp =pd.Series([80, 82, 85, 90, 83, 87, 80, 100], index = new3)
clt_temp

2022-09-05     80
2022-09-06     82
2022-09-07     85
2022-09-08     90
2022-09-09     83
2022-09-10     87
2022-09-11     80
2022-09-12    100
Freq: D, dtype: int64

In [221]:
# what's the temperation for September 9?
clt_temp[4]

83

In [222]:
# create a second series of values using the same index
# You can give them hardcoded values for now [70, 75, 69, 83, 79, 77, 74, 79]
# You can call this series nycTemp
nyc_Temp = pd.Series([70, 75, 69, 83, 79, 77, 74, 100], index = new3)
nyc_Temp

2022-09-05     70
2022-09-06     75
2022-09-07     69
2022-09-08     83
2022-09-09     79
2022-09-10     77
2022-09-11     74
2022-09-12    100
Freq: D, dtype: int64

In [223]:
# the series clt_temp and nyc_temp are aligned by their index values
# calculates the difference by those matching labels
difference = clt_temp - nyc_Temp
difference

2022-09-05    10
2022-09-06     7
2022-09-07    16
2022-09-08     7
2022-09-09     4
2022-09-10    10
2022-09-11     6
2022-09-12     0
Freq: D, dtype: int64

In [224]:
# Write the code to find the temperature difference on September 8?
difference[3]

7

In [225]:
# Write code to find an average difference of temperature between the 2 cities?
difference.mean()

7.5

# The pandas DataFrame

In [226]:
# create a DataFrame from the two series objects clt_temp and nyc_temp
# and give them column names
temps_df = pd.DataFrame(
             {'Charlotte': clt_temp, 
              'NYC': nyc_Temp})
temps_df

            Charlotte  NYC
2022-09-05         80   70
2022-09-06         82   75
2022-09-07         85   69
2022-09-08         90   83
2022-09-09         83   79
2022-09-10         87   77
2022-09-11         80   74
2022-09-12        100  100

In [227]:
# get the column with the name Charlotte
temps_df['Charlotte']

2022-09-05     80
2022-09-06     82
2022-09-07     85
2022-09-08     90
2022-09-09     83
2022-09-10     87
2022-09-11     80
2022-09-12    100
Freq: D, Name: Charlotte, dtype: int64

In [228]:
# likewise we can get just the NYC column
temps_df['NYC']

2022-09-05     70
2022-09-06     75
2022-09-07     69
2022-09-08     83
2022-09-09     79
2022-09-10     77
2022-09-11     74
2022-09-12    100
Freq: D, Name: NYC, dtype: int64

In [229]:
# return both columns in a different order

In [230]:
# retrieve the Charlotte column through PROPERTY SYNTAX
temps_df

            Charlotte  NYC
2022-09-05         80   70
2022-09-06         82   75
2022-09-07         85   69
2022-09-08         90   83
2022-09-09         83   79
2022-09-10         87   77
2022-09-11         80   74
2022-09-12        100  100

In [231]:
# calculate the temperature difference between the two cities using the dataframe
temps_df.Charlotte - temps_df.NYC 

2022-09-05    10
2022-09-06     7
2022-09-07    16
2022-09-08     7
2022-09-09     4
2022-09-10    10
2022-09-11     6
2022-09-12     0
Freq: D, dtype: int64

In [232]:
# add a column to temp_df which contains the difference in temps you can call the column Difference
temps_df['difference'] = temps_df['Charlotte'] - temps_df.NYC 
temps_df

            Charlotte  NYC  difference
2022-09-05         80   70          10
2022-09-06         82   75           7
2022-09-07         85   69          16
2022-09-08         90   83           7
2022-09-09         83   79           4
2022-09-10         87   77          10
2022-09-11         80   74           6
2022-09-12        100  100           0

In [233]:
# get the columns of the dataframe, which is also an Index object
temps_df.columns

Index(['Charlotte', 'NYC', 'difference'], dtype='object')

In [234]:
# slice the temp differences column for the rows at 
# location 1 through 4 (as though it is an array)
temps_df['difference'][1:5]

2022-09-06     7
2022-09-07    16
2022-09-08     7
2022-09-09     4
Freq: D, Name: difference, dtype: int64

In [235]:
# get the row at array position 1
temps_df.iloc[1] # you pass in a number where as loc is a specific set number

Charlotte     82
NYC           75
difference     7
Name: 2022-09-06 00:00:00, dtype: int64

In [236]:
# the names of the columns have become the index
# they have been 'pivoted'
# retrieve a random row of your choice by index label using .loc
temps_df.loc[['2022-09-07']]

            Charlotte  NYC  difference
2022-09-07         85   69          16

In [237]:
# get the values in the Differences column in tows 1, 3 and 5
# using 0-based location
temps_df.iloc[[1,3,5]]['difference']

2022-09-06     7
2022-09-08     7
2022-09-10    10
Freq: 2D, Name: difference, dtype: int64

In [238]:
# which values in the Missoula column are > 82?
temps_df.Charlotte > 82

2022-09-05    False
2022-09-06    False
2022-09-07     True
2022-09-08     True
2022-09-09     True
2022-09-10     True
2022-09-11    False
2022-09-12     True
Freq: D, Name: Charlotte, dtype: bool

In [239]:
# return the rows where the temps for Missoula > 82
temps_df[temps_df.Charlotte > 82]
temps_df.describe().loc['std']

Charlotte     6.664136
NYC           9.855202
difference    4.720775
Name: std, dtype: float64

# Loading data from a CSV file into a DataFrame

In [240]:
# read the contents of the file activity3_0.csv into a DataFrame. Call the dataframe df

In [241]:
# Print the contents of the date column

In [242]:
# Get the first value in the date column

In [243]:
# Write the code to get the type of the Date

In [244]:
# read the data and tell pandas the date column should be a date in the resulting DataFrame
# https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html

In [245]:
# verify the type now is date
# in pandas, this is actually a Timestamp

In [246]:
# unfortunately the index is numeric which makes accessing data by date more complicated
# read in again, now specity the data column as being the index of the resulting DataFrame

In [247]:
# Verify that the index is now a DatetimeIndex by calling df.index

# Visualization

In [248]:
# plots the values in the Close column
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.html

# Thats it for today :D Good job!!!!

#### Submit this notebook on canvas and you will be graded for correctness. Make sure that you are not cutting corners or shortcuts 