# Pandas Notes 1

todo: loc and iloc stuff

In [1]:
import pandas as pd

## Pandas DataTypes
### Series
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.html

A series is like an assosiative 1D array:
![series](img/pandas_series.jpg)

In [2]:
# Easy series with integer indexing from 0 on
series_numbers = pd.Series([1, 2, 3, 4, 5])
print(series_numbers)
print('\n')
# Series with associative indexing and name (see img)
ingredients = pd.Series(['4 cups', '1 cup', '2 large', '1 can'], index=['Flour', 'Milk', 'Eggs', 'Spam'], name='Dinner')
print(ingredients)

0    1
1    2
2    3
3    4
4    5
dtype: int64


Flour     4 cups
Milk       1 cup
Eggs     2 large
Spam       1 can
Name: Dinner, dtype: object


### dataFrame
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html

A dataFrame is like an assosiative 2D array:
![df](img/pandas_dataframe.jpg)
Or you can say, a dataFrame consists of 1 ore more Series with the same indexing.

In [3]:
fruit_sales = pd.DataFrame({'Apples': [35, 41], 'Bananas': [21,34]}, index=['2017 Sales', '2018 Sales'])
print(fruit_sales)

            Apples  Bananas
2017 Sales      35       21
2018 Sales      41       34


### Panel
A series is like an assosiative 3D array. You can think of it as an cube with elements:
![panel1](img/pandas_panel1.jpg)
Another viewpoint is seeing it like an array of dataFrames. Like a dataFrame can be seen as an array of rows. Every row can be seen as a series.
![panel2](img/pandas_panel2.jpg)

## Manipulating data
Rows/Data: append, delete, update<br>
Columns: append, delete, update<br>
Rename: index, column, Change index

### Series

In [4]:
# Rows / Data
series_numbers = pd.Series([1, 2, 3, 4, 5])
second_series = pd.Series([6, 7, 8])

series_numbers = series_numbers.append(second_series) # indices will be kept
series_numbers = series_numbers.append(second_series, ignore_index=True) # indices will be ignored 

series_numbers.drop(series_numbers.index[[5, 6]])
series_numbers[2] = 555
series_numbers.update(pd.Series(['Hugo', 'Horst'], index=[1,4]))
print(series_numbers)

import numpy as np
ingredients = pd.Series(['4 cups', '1 cup', '2 large', '1 can'], index=['Flour', 'Milk', 'Eggs', 'Spam'], name='Dinner')
more_ingredients = pd.Series(['1 cup', '1 large', np.NaN], index=['Soy milk', 'Banana', 'Berries'])
ingredients = ingredients.append(more_ingredients) # indices will be kept, ignore_index=True indices will be ignored
ingredients = ingredients.drop(labels=['Milk', 'Eggs', 'Spam']) # these ingredients are not vegan ;-)
ingredients.update(pd.Series({'Berries': 'a few'}))

print(ingredients)

0         1
1      Hugo
2       555
3         4
4     Horst
5         6
6         7
7         8
8         6
9         7
10        8
dtype: object
Flour        4 cups
Soy milk      1 cup
Banana      1 large
Berries       a few
dtype: object


### dataFrames
#### Append

In [5]:
fruit_sales = pd.DataFrame({'Apples': [35, 41], 'Bananas': [21,34]}, index=['2017 Sales', '2018 Sales'])
other_fruit_sales = pd.DataFrame({'Apples': [8], 'Bananas': [10]}, index=['2019 Sales'])
fruit_sales = fruit_sales.append(other_fruit_sales)
print(fruit_sales)

            Apples  Bananas
2017 Sales      35       21
2018 Sales      41       34
2019 Sales       8       10


#### Drop rows and colums

In [6]:
fruit_sales = fruit_sales.drop(['2018 Sales']) # drop rows (axis 0)
fruit_sales = fruit_sales.drop(['Bananas'], axis='columns') # drop columns or axis 1
print(fruit_sales)

            Apples
2017 Sales      35
2019 Sales       8


#### Update Columns

In [7]:
# For Updating, the indexing needs to be the same
# because 2018 Sales do not exist in fruit_sales, it will not be updated here
update_fruits = pd.DataFrame({'Apples': [88, 99]}, index=['2017 Sales', '2018 Sales'])
fruit_sales.update(update_fruits)
print(fruit_sales)

            Apples
2017 Sales    88.0
2019 Sales     8.0


#### Insert Columns

In [8]:
# Insert Column
fruit_sales = pd.DataFrame({'Apples': [35, 41], 'Bananas': [21,34]}, index=['2017 Sales', '2018 Sales'])
fruit_sales.insert(1, "SellId", ['Sell01', 'Sell02'], allow_duplicates=False)
print(fruit_sales)

            Apples  SellId  Bananas
2017 Sales      35  Sell01       21
2018 Sales      41  Sell02       34


In [9]:
# Delete Column
fruit_sales = fruit_sales.drop('Bananas', axis='columns')
print(fruit_sales)

            Apples  SellId
2017 Sales      35  Sell01
2018 Sales      41  Sell02


In [10]:
# Rename column
fruit_sales = fruit_sales.rename(columns={'Apples': 'Cauliflowers'})
print(fruit_sales)

            Cauliflowers  SellId
2017 Sales            35  Sell01
2018 Sales            41  Sell02


In [11]:
# Change a single index value
as_list = fruit_sales.index.tolist()
idx = as_list.index('2018 Sales')
as_list[idx] = '2020 Sales'
fruit_sales.index = as_list
print(fruit_sales)

            Cauliflowers  SellId
2017 Sales            35  Sell01
2020 Sales            41  Sell02


In [12]:
# Change Index (column)
print(fruit_sales.index)
fruit_sales = fruit_sales.set_index('SellId') # or inplace=True
print(fruit_sales)
print(fruit_sales.index)

Index(['2017 Sales', '2020 Sales'], dtype='object')
        Cauliflowers
SellId              
Sell01            35
Sell02            41
Index(['Sell01', 'Sell02'], dtype='object', name='SellId')
