# Pandas Introduction

In [None]:
import pandas as pd

In [None]:
# Creating a Series 
ser1 = pd.Series([10,20,30,40,50])

In [None]:
# Displaying Series
ser1
# Numbers on the left are indices

In [None]:
ser2 = pd.Series([[10, 20], 
                [30, 40.5,'series'],
                [50, 55],
                {'Name':'Tess','Org':'Packt'}])

ser2

In [None]:
## Data Frame Object 
df = pd.DataFrame([10,20,30,40,50,60])

df

In [None]:
df.shape

In [None]:
df.columns

In [None]:
# Rows
df.index

In [None]:
print("These are the names of the columns",list(df.columns)) 

print("These are the row indices",list(df.index))

In [None]:
# Renaming Columns 
df.columns = ['V1']
df

In [None]:
# Change Row Names or Indices 
df.index = ['R1','R2','R3','R4','R5','R6']
df

In [None]:
# Creating DataFrame with multiple columns 

df1 = pd.DataFrame([[10,15,20],[100,200,300]])
print("Shape of new data frame",df1.shape) 

df1

In [None]:
df1 = pd.DataFrame([[10,15,20],[100,200,300]],columns=['V1','V2','V3'], index=['R1','R2'])
df1

## Working with Local Files 

In [None]:
# Reading a CSV File
filename = 'data\student-por.csv'
studentData = pd.read_csv(filename, delimiter=';')

In [None]:
print(studentData.shape)
studentData.index
print(studentData.columns)

In [None]:
studentData.head()

#### Writing CSV
##### df.to_csv(filepath)

In [None]:
# Data Types 
studentData.dtypes

In [None]:
studentData.info()

In [None]:
# Converting A variable Data Type 
studentData['Medu'] = studentData['Medu'].astype('float')
studentData.info()

### Data Selection 

In [None]:
# Data Extractng  
ageDf = studentData['age']
ageDf

In [None]:
# Extracting multiple columns from DataFrame 
studentSubset1 = studentData[['age','address','famsize']] 
studentSubset1

In [None]:
studentSubset2 = studentData.loc[:25,['age','address','famsize']]
studentSubset2.shape

In [None]:
# Data Transformation 
studentData.groupby(['famsize'])['famsize'].agg('count')

In [None]:
aggData = studentData.groupby(['famsize'])['famsize'].agg('count')
aggData

In [None]:
x = list(aggData.index)
y = aggData.values

In [None]:
import matplotlib.pyplot as plt 
%matplotlib inline 
plt.style.use('ggplot')


In [None]:
# Plotting the data 
plt.bar(x, y, color='maroon') 
plt.xlabel("Family Sizes") 
plt.ylabel("Count of Students ")
plt.title("Distribution of students against family sizes") 
plt.show()

### Time Series

In [None]:
date_1 = pd.to_datetime('15th of January, 2021')
print(date_1)

In [None]:
newdate = date_1 + pd.to_timedelta(25, unit = 'D')
print(newdate)

In [None]:
# Getting All Dates with in a window 
futuredate = pd.date_range(start = newdate, periods = 7, freq = 'D')
futuredate

In [None]:
# Code Optimization 
# Apply and Lambda function 
# Defining an Apply Function 
def add5 (x):
    return x + 5 

In [None]:
# Using Apply 
df_t = studentData[['G1', 'G2','G3']].apply(add5)
df_t.head()

In [None]:
df_t2 = studentData[['G1','G2','G3']].apply(lambda x:x+5) 
df_t2.head()

### Utility Functions

In [None]:
# Utility Functions
import numpy as np
# 10 Random numbers with mean 2 and SD 1
np.random.normal(2.0,1,10)

#### Function Workings

In [None]:
import random

In [None]:
# Initialize a random seed 
np.random.seed(123)
# Create three series
ser1 = pd.Series(np.random.normal(3.0, 1, 100)) 
ser2 = pd.Series(np.random.normal(5.0, 3, 100)) 
ser3 = pd.Series(np.random.normal(1.0, 0.5, 100))

In [None]:
Df = pd.concat([ser1,ser2,ser3], axis=1)
Df.columns=['V1','V2','V3']
Df.head()
# Axis =1 indicates concat across columns

In [None]:
Df.sum(axis=0)
# Axis =0, function across rows

In [None]:
Df.mean(axis=0)
# take all row data and compute mean 

In [None]:
Df.mean(axis=1)

In [None]:
# Apply the divmod function to each of the series 
Df.apply(lambda x: divmod(x,3))

In [None]:
# Apply map, gives results in a dataframe 
Df.applymap(lambda x: divmod(x,3))

In [None]:
# List Comprehension 
letters = [letter for letter in 'Pandas']
print(letters)

In [None]:
# Create a list of characters
list1 = [['20']*10,['35']*15,['40']*10,['10']*25,['15']*40]
# Convert them into a single list using list comprehension
charlist = [x for sublist in list1 for x in sublist] 
# Display the output 
len(charlist)

newlist = [expression _for_ item _in_ iterable _if_ condition == True]

In [None]:
# Randomly shuffle the character list 
random.seed(123)
random.shuffle(charlist)
# Convert the list to a series s
ser4 = pd.Series(charlist) 
ser4

In [None]:
ser4 = pd.to_numeric(ser4)
ser4

In [None]:
Df = pd.concat([Df,ser4],axis=1) 
# Renaming the data frame
Df.rename(columns={0:'V4'}, inplace=True)
# Implace is same as : Df = Df.renamecolumns=({0:'V4'})
# Displaying the data frame 
Df

In [None]:
numpArray = Df.to_numpy()
numpArray

## Data Modeling 
Imputation , Scaling and Normalization