In [14]:
import pandas as pd
import numpy as np

In [15]:
# A numpy array
arr = np.random.randn(4) # random values from std normal distribution
print(type(arr)) #class: normal distribution array
print(arr, "\n")

# A pandas series made from the previous array
s = pd.Series(arr) 
print(type(s))#class: series
print(s)

<class 'numpy.ndarray'>
[-0.88128131 -1.09233235 -1.23627197 -1.25552967] 

<class 'pandas.core.series.Series'>
0   -0.881281
1   -1.092332
2   -1.236272
3   -1.255530
dtype: float64


### The basic method to create a pandas.Series is to call
s = pd.Series(data, index=index)

In [16]:
# Creating a series from a numpy array 
pd.Series(np.arange(3), index=[2023, 2024, 2025])

2023    0
2024    1
2025    2
dtype: int64

In [17]:
#Creating a pandas series from a list

# A series from a list of strings with default index
pd.Series(['EDS 220', 'EDS 222', 'EDS 223', 'EDS 242'])

0    EDS 220
1    EDS 222
2    EDS 223
3    EDS 242
dtype: object

In [18]:
# Creating a pandas series from a dictionary

# Construct dictionary
d = {'key_0':2, 'key_1':'3', 'key_2':5}

# Initialize series using a dictionary
pd.Series(d)

key_0    2
key_1    3
key_2    5
dtype: object

In [19]:
#Creating a pandas.Series from a single value 

pd.Series(3.0, index = ['A', 'B', 'C'])

A    3.0
B    3.0
C    3.0
dtype: float64

In [20]:
# Define a series
s = pd.Series([98,73,65],index=['Andrea', 'Beth', 'Carolina'])

# Divide each element in series by 10
print(s /10, '\n')

# Take the exponential of each element in series
print(np.exp(s), '\n')

# Original series is unchanged
print(s)

Andrea      9.8
Beth        7.3
Carolina    6.5
dtype: float64 

Andrea      3.637971e+42
Beth        5.052394e+31
Carolina    1.694889e+28
dtype: float64 

Andrea      98
Beth        73
Carolina    65
dtype: int64


In [21]:
# Produce a new pandas.Series with True/False values indicating whether the elements in a series satisfy a condition or not:

s > 70

Andrea       True
Beth         True
Carolina    False
dtype: bool

In [22]:
# Construct a series with NAs in it

s = pd.Series([1, 2, np.nan, 4, np.nan])
s

0    1.0
1    2.0
2    NaN
3    4.0
4    NaN
dtype: float64

In [23]:
# Check if series has NAs

s.hasnans

True

In [24]:
#Detect which elements in the series are NAs

s.isna()

0    False
1    False
2     True
3    False
4     True
dtype: bool

## Check in 1

In [25]:
# Create and print a series with lettered index and NA values represented as '999'
myseries = pd.Series([0.25, -999, 0.75, -999], index = ['A','B','C','D'])
print(myseries) 

myseries = myseries.mask(myseries == -999)

A      0.25
B   -999.00
C      0.75
D   -999.00
dtype: float64


# Creating a pandas dataframe

In [26]:
# Initialize dictionary with columns' data 
d = {'col_name_1' : pd.Series(np.arange(3)),
     'col_name_2' : pd.Series([3.1, 3.2, 3.3]),
     }

# Create data frame
df = pd.DataFrame(d)
df

Unnamed: 0,col_name_1,col_name_2
0,0,3.1
1,1,3.2
2,2,3.3


In [27]:
# Change index attribute in the dataframe
df.index = ['a','b','c']
df

Unnamed: 0,col_name_1,col_name_2
a,0,3.1
b,1,3.2
c,2,3.3


# Check in 2

In [28]:
print(df.columns) #see column names

renamed_df = df.rename(columns = {'col_name_1': 'C1', 'col_name_2': 'C2'})
print(renamed_df)

Index(['col_name_1', 'col_name_2'], dtype='object')
   C1   C2
a   0  3.1
b   1  3.2
c   2  3.3


### Summary:

- A pandas Series is an indexed one-dimensional array presented as a list while a pandas Dataframe is a dictionary of a pd.Series presented as tabular data (spreadsheet). 
- Series
    - We can initialze a pandas series with standard indeces or assigning a list of values/characters as the index. 
    - We can also initialize a pandas series from a dictionary where the keys are the indeces and the values are the data (either characters or float values).
    - We can perform arithmetic operations on a Series without affecting the original Series.
    - We can create a pandas series by introducing Boolean operations to check if the values fulfill a specified condition.
    - We can check if a Series has NA values (s.hasnas) and see which elements specifically have NAs (s.isna()).
- Dataframe
    - We can create a dataframe from a dictionary. 
    - We can change the index attribute of the dataframe. 
    - We can rename the columns through the columns attribute by writing them as a dictionary.  