# Introducing Pandas

Pandas is an open source library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language.

In [39]:
import numpy as np
np.random.seed(0)
import pandas as pd

In [40]:
def series_info(series: pd.Series) -> None:
    print(f"ndim: {series.ndim}")
    print(f"shape: {series.shape}")
    print(f"size: {series.size}")
    print(f"dtype: {series.dtype}")
    print(f"values:\n{series}\n")

In [41]:
def df_info(df: pd.DataFrame) -> None:
    print(f"ndim: {df.ndim}")
    print(f"shape: {df.shape}")
    print(f"size: {df.size}")
    print(f"dtype: {df.dtypes}")
    print(f"values:\n{df}\n")

In [42]:
data = pd.Series(data=[0.25, 0.5, 0.75, 1.0], dtype=np.float32)

series_info(data)

ndim: 1
shape: (4,)
size: 4
dtype: float32
values:
0    0.25
1    0.50
2    0.75
3    1.00
dtype: float32



In [43]:
print(type(data.values))
print(data.values)

<class 'numpy.ndarray'>
[0.25 0.5  0.75 1.  ]


In [44]:
print(type(data.index))
print(data.index)

<class 'pandas.core.indexes.range.RangeIndex'>
RangeIndex(start=0, stop=4, step=1)


In [45]:
print(data[1])

0.5


In [46]:
slice_data = data[1:3]

series_info(slice_data)

ndim: 1
shape: (2,)
size: 2
dtype: float32
values:
1    0.50
2    0.75
dtype: float32



In [48]:
slice_data[1]

0.5

### pd.Series

Series is a one-dimensional labeled array capable of holding any data type (integers, strings, floating point numbers, Python objects, etc.).  
The axis labels are collectively referred to as the index.

The basic method to create a Series is to call:

```python
s = pd.Series(data, index=index)
```

- an iterable object (list, np.ndarray)
- a scalar value (like 5)

In [29]:
data = pd.Series(
    [0.25, 0.5, 0.75, 1.0],
    index=['a', 'b', 'c', 'd']
)

series_info(data)

ndim: 1
shape: (4,)
size: 4
dtype: float64
values:
a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64



In [30]:
print(data['a'])

0.25


In [31]:
colors_dict = {
    'Red': 12,
    'Green': 8,
    'Blue': 33,
}
colors_s = pd.Series(
    colors_dict,
    dtype=np.int8
)

series_info(colors_s)

ndim: 1
shape: (3,)
size: 3
dtype: int8
values:
Red      12
Green     8
Blue     33
dtype: int8



In [32]:
print(colors_s['Green'])

8


In [33]:
series_info(pd.Series([2, 4, 6]))

ndim: 1
shape: (3,)
size: 3
dtype: int64
values:
0    2
1    4
2    6
dtype: int64



## pd.DataFrame

DataFrame is a 2-dimensional labeled data structure with columns of potentially different types. You can think of it like a spreadsheet or SQL table, or a dict of Series objects. It is generally the most commonly used pandas object.

Like Series, DataFrame accepts many different kinds of input:

- Dict of 1D ndarrays, lists, dicts, or Series
- 2-D numpy.ndarray
- Structured or record ndarray
- A Series
- Another DataFrame

In [34]:
codes_dict = {
    'Red': '#FF0000',
    'Green': '#00FF00',
    'Blue': '#0000FF',
}
codes_s = pd.Series(codes_dict, dtype="string")

series_info(codes_s)

ndim: 1
shape: (3,)
size: 3
dtype: string
values:
Red      #FF0000
Green    #00FF00
Blue     #0000FF
dtype: string



In [35]:
combined_df = pd.DataFrame({
    'count': colors_s,
    'code': codes_s
})

df_info(combined_df)

ndim: 2
shape: (3, 2)
size: 6
dtype: count      int8
code     string
dtype: object
values:
       count     code
Red       12  #FF0000
Green      8  #00FF00
Blue      33  #0000FF



In [36]:
print(combined_df.index)

Index(['Red', 'Green', 'Blue'], dtype='object')


In [37]:
print(combined_df.columns)

Index(['count', 'code'], dtype='object')


In [38]:
print(type(combined_df["count"]))

series_info(combined_df["count"])

<class 'pandas.core.series.Series'>
ndim: 1
shape: (3,)
size: 3
dtype: int8
values:
Red      12
Green     8
Blue     33
Name: count, dtype: int8

