# Installing and Running pandas

In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.__version__

'2.2.3'

# Introduction to pandas Series

![image.png](attachment:image.png)

 so we can create a series from a list

In [3]:
products = ['A', 'B', 'C', 'D']
products

['A', 'B', 'C', 'D']

In [4]:
type(products)

list

In [5]:
product_categories = pd.Series(products)

In [6]:
product_categories         #indexing from 0

0    A
1    B
2    C
3    D
dtype: object

![image.png](attachment:image.png)

In [7]:
type(product_categories)

pandas.core.series.Series

In [8]:
type(pd.Series(products))

pandas.core.series.Series

In [9]:
daily_rates_dollars = pd.Series([40, 45, 50, 60])
daily_rates_dollars

0    40
1    45
2    50
3    60
dtype: int64

In [10]:
type(daily_rates_dollars)

pandas.core.series.Series

![image.png](attachment:image.png)

In [11]:
print(daily_rates_dollars)

0    40
1    45
2    50
3    60
dtype: int64


In [12]:
array_a = np.array([10, 20, 30, 40, 50])
array_a

array([10, 20, 30, 40, 50])

In [13]:
type(array_a)

numpy.ndarray

In [14]:
series_a = pd.Series(array_a)
series_a

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [15]:
type(series_a)

pandas.core.series.Series

![image.png](attachment:image.png)

![image.png](attachment:image.png)

![image.png](attachment:image.png)

# Working with Attributes in Python

![image.png](attachment:image.png)

In [16]:
series_a = pd.Series([10, 20, 30, 40, 50])
series_a

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [17]:
series_a.dtype

dtype('int64')

In [18]:
series_a.size

5

In [19]:
product_categories = pd.Series(['A', 'B', 'C', 'D'])
product_categories

0    A
1    B
2    C
3    D
dtype: object

In [20]:
product_categories.dtype

dtype('O')

In [21]:
product_categories.size

4

In [22]:
type(product_categories.size)

int

In [23]:
product_categories.name

![image.png](attachment:image.png)

In [24]:
print(product_categories.name)

None


In [25]:
product_categories.name = "Product Categories"
product_categories

0    A
1    B
2    C
3    D
Name: Product Categories, dtype: object

In [26]:
product_categories.name

'Product Categories'

In [27]:
print(product_categories.name)

Product Categories


![image.png](attachment:image.png)

# Using an Index in pandas
here we will be focusing on pandas series object 
![image.png](attachment:image.png)

In [28]:
prices_per_category = {'Product A': 22250, 'Product B' : 16600, 'Product C':15600}
prices_per_category

{'Product A': 22250, 'Product B': 16600, 'Product C': 15600}

In [29]:
type(prices_per_category)

dict

In [30]:
prices_per_category = pd.Series(prices_per_category)
prices_per_category

Product A    22250
Product B    16600
Product C    15600
dtype: int64

![image.png](attachment:image.png)

![image.png](attachment:image.png)
![image-2.png](attachment:image-2.png)

In [31]:
type(prices_per_category)

pandas.core.series.Series

In [32]:
prices_per_category.index

Index(['Product A', 'Product B', 'Product C'], dtype='object')

In [33]:
type(prices_per_category.index)

pandas.core.indexes.base.Index

Label-based vs Position-based Indexing

In [34]:
series_a = pd.Series([10, 20, 30, 40, 50])
series_a

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [35]:
series_a.index

RangeIndex(start=0, stop=5, step=1)

In [36]:
type(series_a.index)

pandas.core.indexes.range.RangeIndex

In [37]:
list(series_a.index)

[0, 1, 2, 3, 4]

![image.png](attachment:image.png)

In [38]:
#example
series_a = pd.Series([10, 20, 30], index=['a', 'b', 'c'])
print(list(series_a.index))  

['a', 'b', 'c']


In [39]:
prices_per_category = pd.Series({'Product A': 22250, 'Product B' : 16600, 'Product C':12500})
prices_per_category

Product A    22250
Product B    16600
Product C    12500
dtype: int64

In [40]:
prices_per_category.index

Index(['Product A', 'Product B', 'Product C'], dtype='object')

In [41]:
type(prices_per_category.index)

pandas.core.indexes.base.Index

# More on Working with Indices in Python

In [42]:
series_a = pd.Series([10, 20, 30, 40, 50])
prices_per_category = pd.Series({'Product A': 22250, 'Product B' : 16600, 'Product C':12500})

In [43]:
series_a

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [44]:
series_a[0]

np.int64(10)

In [45]:
prices_per_category

Product A    22250
Product B    16600
Product C    12500
dtype: int64

In [46]:
prices_per_category['Product A']

np.int64(22250)

In [47]:
# prices_per_category[0]
# prices_per_category[1]
prices_per_category[2]

  prices_per_category[2]


np.int64(12500)

In [48]:
series_b = pd.Series([10, 20, 30, 40, 50], index = [1, 2, 3, 4, 5])
series_b

1    10
2    20
3    30
4    40
5    50
dtype: int64

In [50]:
#series_b[0]
series_b[1]

np.int64(10)

# Using Methods in Python - Part I

In [51]:
start_date_deposits = pd.Series({
    '7/4/2014'   : 2000,
    '1/2/2015'   : 2000,
    '12/8/2012'  : 1000,
    '2/20/2015'  : 2000,
    '10/28/2013' : 2000,
    '4/19/2015'  : 2000,
    '7/4/2016'   : 2000,
    '4/24/2014'  : 2000,
    '9/3/2015'   : 4000,
    '7/25/2016'  : 2000,
    '5/1/2014'   : 2000,
    '3/29/2013'  : 2000,
    '10/3/2014'  : 2000,
    '9/18/2015'  : 2500
})

In [52]:
start_date_deposits

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
4/19/2015     2000
7/4/2016      2000
4/24/2014     2000
9/3/2015      4000
7/25/2016     2000
5/1/2014      2000
3/29/2013     2000
10/3/2014     2000
9/18/2015     2500
dtype: int64

In [54]:
start_date_deposits.sum()

np.int64(29500)

In [55]:
start_date_deposits.min()

np.int64(1000)

In [56]:
start_date_deposits.max()

np.int64(4000)

In [57]:
start_date_deposits.idxmax()

'9/3/2015'

In [58]:
start_date_deposits.idxmin()

'12/8/2012'

In [59]:
start_date_deposits.head()

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
dtype: int64

In [60]:
start_date_deposits.tail()

7/25/2016    2000
5/1/2014     2000
3/29/2013    2000
10/3/2014    2000
9/18/2015    2500
dtype: int64

# Introduction to pandas DataFrames

#1: Construct a DataFrame from a dictionary of lists

In [62]:
data = {'ProductName':['Product A', 'Product B', 'Product C'], 'ProductPrice':[22250, 16600, 12500]}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500


#2: Construct a DataFrame from a dictionary of lists + specify an index

In [63]:
data = {'ProductName':['Product A', 'Product B', 'Product C'], 'ProductPrice':[22250, 16600, 12500]}
df = pd.DataFrame(data, index = ['A', 'B', 'C'])
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,12500


#3: Construct a DataFrame from a list of dictionaries

In [64]:
data = [{'ProductName':'Product A', 'ProductPrice':22250}, 
        {'ProductName':'Product B', 'ProductPrice':16600}, 
        {'ProductName':'Product C', 'ProductPrice':12500}]
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [65]:
data = [{'ProductName':'Product A', 'ProductPrice':22250}, 
        {'ProductName':'Product B', 'ProductPrice':16600}, 
        {'ProductName':'Product C', 'ProductPrice':12500},
        {'ProductName':'Product D'}]
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250.0
1,Product B,16600.0
2,Product C,12500.0
3,Product D,


#4: Construct a DataFrame from a dictionary of pandas Series

In [66]:
ser_products = pd.Series(['Product A', 'Product B', 'Product C'])
ser_prices = pd.Series([22250, 16600, 12500])

In [67]:
data = {'ProductName':ser_products, 'ProductPrice':ser_prices}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [68]:
ser_products = pd.Series(['Product A', 'Product B', 'Product C'], index = ['A', 'B', 'C'])
ser_prices = pd.Series([22250, 16600, 12500], index = ['A', 'B', 'C'])

data = {'ProductName':ser_products, 'ProductPrice':ser_prices}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,12500


In [69]:
ser_products = pd.Series(['Product A', 'Product B', 'Product C'], index = ['A', 'B', 'C'])
ser_prices = pd.Series([22250, 16600, 12500], index = ['C', 'B', 'A'])

data = {'ProductName':ser_products, 'ProductPrice':ser_prices}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,12500
B,Product B,16600
C,Product C,22250


#5: Construct a DataFrame from a list of lists

In [70]:
df = pd.DataFrame(data = [['Product A', 22250], ['Product B', 16600], ['Product C', 12500]], 
                  columns = ['ProductName', 'ProductPrice'],
                  index = ['A', 'B', 'C'])
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,12500


In [71]:
df.shape

(3, 2)