In [None]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series

pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 10)
pd.set_option('display.max_rows', 10)

In [None]:
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.style.use('ggplot')

### The pandas <u>Series</u> object

#### Series Instantiation

In [None]:
# instantiate a Series using an Numpy array
array1 = np.array([1, 2, 3, 4])
print(array1, '\n')

series1 = Series(array1)
print(series1)

In [None]:
# instantiate a Series using an Python list
list2 = [1, 2, 3, 4]
print(list2, '\n')

series2 = Series(list2)
print(series2)

<b>When you instantiate a Series from a Numpy array of integers, pandas assigns a datatype of int32 for the Series data.</b> <br>
<b>Whereas, instantiating a Series from a Python list of integers, pandas assigns int64 for the Series data.</b>

In [None]:
# instantiate a Series using an Python list and explicitly assign non-integer labels to index
list3 = [1, 2, 3, 4]
print(list3, '\n')

series3 = Series(list3, index=['a', 'b', 'c', 'd'])
print(series3)

In [None]:
# instantiate a Series using an Python list and explicitly assign non-integer labels to index
list4 = [1, 2, 3, 4]
print(list4, '\n')

series4 = Series(list4, index=[0, 1, 2, 3])
print(series4)

#### Selecting data from a Series

In [None]:
# selecting data from a Series when the index is integer labeled (does NOT use zero-based positioning)
print(series1[[1, 3]], '\n')           # returns a Series
print(series1[3])                      # returns an integer

In [None]:
# selecting data from a Series when the index is non-integer labeled (uses label-based positioning)
print(series3[['a', 'c']], '\n')
print(series3['a'], '\n')
print(series3[[0, 2]], '\n')            # note that selection using integers for a non-integer labeled Series implements
                                        # zero-based positioning
    
print(series4, '\n')
print(series4[[1, 3]])

#### The Series index

In [None]:
print(series1.index, '\n')         # when Series are instantiated with integers, the index is a RangeIndex object
print(series2.index, '\n')
print(series3.index, '\n')
print(series4.index, '\n')         # when Series is instantiated with integer index assigned explicitly the index is
                                   # an Int64Index object

#### A time-series Series

In [None]:
dates_ts1 = pd.date_range('2023-06-01', '2023-06-06')       # this is a special index in pandas called a DatetimeIndex
                                                            # that is optimized to index data with dates and times
print(dates_ts1, '\n')
print(dates_ts1[[0, 2]], '\n')
print(dates_ts1[3], '\n')

In [None]:
dates_ts2 = pd.date_range('2023-06-02', '2023-06-05')

series_prices1 = Series([80, 82, 85, 90, 83, 87], index=dates_ts1)
series_prices2 = Series([75, 69, 83, 79], index=dates_ts2)
series_prices3 = Series([70, 75, 69, 83, 79, 77], index=dates_ts1)

print(series_prices1, '\n')
print(series_prices2)

#### Basic statistics on Series data

In [None]:
series_prices1_mean = series_prices1.mean()
series_prices2_mean = series_prices2.mean()

print(series_prices1_mean, '\n')
print(series_prices2_mean, '\n')

In [None]:
series_prices_diff = series_prices1 - series_prices2        # understand by the first and last row return NaN

print(series_prices_diff)

### The pandas <u>DataFrame</u> object

#### A pandas DataFrame is a collection of one or more pandas Series aligned by a common index.

#### DataFrame Instantiation

In [None]:
df1 = DataFrame({'IBM':series_prices1, 'MSFT':series_prices3})
df2 = DataFrame({'IBM':series_prices1, 'TKWY_NA':series_prices2})

print(df1, '\n')
print(df2)

#### Selecting data from a DataFrame

In [None]:
print(df1['IBM'], '\n')                          # this returns a pandas Series
print(df1.IBM, '\n')
print(df1[['MSFT', 'IBM']], '\n')                # this retuns a pandas DataFrame

In [None]:
df1['IBM'][[1, 4]]                 # here you are selecting from a Series extracted from the general DataFrame

In [None]:
df1.IBM - df1.MSFT                  # this is identical to math using two separate Series

In [None]:
df1['Difference'] = df1.IBM - df1.MSFT

print(df1)

In [None]:
df1.index

In [None]:
df1.columns

In [3]:
[1:4]

SyntaxError: invalid syntax (1830641140.py, line 1)