# Lecture 26. Complete Pandas from Scratch - Part 1

In [1]:
!pip install pandas



In [3]:
import pandas as pd
pd.__version__

'2.2.1'

In [5]:
import numpy as np
np.__version__

'1.26.4'

## Creating Dataframes
1. What is a dataframe?
2. How is it related to other data storage options?
   
A DataFrame is a fundamental data structure in the Python programming library pandas, which is designed for data manipulation and analysis.
Essentially, a DataFrame represents a table of data with rows and columns, similar to a spreadsheet, a SQL table, or data in a CSV file. Each column in a DataFrame can hold data of a single data type, but different columns can hold different types of data (e.g., integers, floats, strings,
Python objects, etc.).

In [None]:
# Creating a DataFrame from a dictionary
data = {'column A': [3, 4, 5, 6], 'column B': [5, 8, 9, 0]}
df = pd.DataFrame(data)
print(df)

In [None]:
# Creating a DataFrame from a list of dictionaries
data = [{'Column1': 1, 'Column2': 'A'},
        {'Column1': 2, 'Column2': 'B'},
        {'Column1': 3, 'Column2': 'C'}]
df = pd.DataFrame(data)
print(df)


In [None]:
# Creating a DataFrame from a list of dictionaries with missing values
data = [{'Column1': 1, 'Column2': 'A', 'Column3': 'Python'},
        {'Column1': 2, 'Column2': 'B'},
        {'Column1': 3, 'Column2': 'C'}]
df = pd.DataFrame(data)
print(df)

In [None]:
# Creating a Series
series = pd.Series([1, 2, 3])
print(series)
print(type(series))

In [None]:
# Creating a DataFrame from a dictionary of Series
data = {"Column1": pd.Series([1, 2, 3]),
        "Column2": pd.Series(['Jan', 'Feb'])}
df = pd.DataFrame(data)
print(df)

In [33]:
# Creating a DataFrame from a list of lists with specified column names
data = [[100, 'Mon'], [200, 'Tue'], [300, 'Wed']]
columns = ['Sales', 'Day_Name']
df = pd.DataFrame(data, columns=columns)
print(df)

print("----------------")

# Displaying data types of DataFrame columns
print(df.dtypes)

   Sales Day_Name
0    100      Mon
1    200      Tue
2    300      Wed
----------------
Sales        int64
Day_Name    object
dtype: object


In [35]:
# Creating a DataFrame with mixed data types
df = pd.DataFrame({
    "float": [1.0],
    "int": [1],
    "datetime": [pd.Timestamp('20240307')],
    "string": ['Praveen']
})
print(df)

print("----------------")

# Displaying data types of DataFrame columns
print(df.dtypes)

   float  int   datetime   string
0    1.0    1 2024-03-07  Praveen
----------------
float              float64
int                  int64
datetime    datetime64[ns]
string              object
dtype: object


In [36]:
# Creating a DataFrame with specified data types
data = {"Column A": ['1', 4, 5, 6], "Column B": [5, 8, 9, 0]}
df = pd.DataFrame(data=data, dtype=np.int8)
print(df)
print("----------------")

print(df.dtypes)


   Column A  Column B
0         1         5
1         4         8
2         5         9
3         6         0
----------------
Column A    int8
Column B    int8
dtype: object


In [37]:
# Modifying DataFrame values
array = np.array(df['Column A'])
array = array + 10
df['Column A'] = array
print(df)


   Column A  Column B
0        11         5
1        14         8
2        15         9
3        16         0


In [38]:
# Creating a DataFrame from a NumPy array with specified column names
df2 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), columns=['A', 'B', 'C'])
print(df2)

   A  B  C
0  1  2  3
1  4  5  6
2  7  8  9


# Empty Dataframe

In [52]:
# Creating an empty DataFrame
empty_df = pd.DataFrame()
print(empty_df)

Empty DataFrame
Columns: []
Index: []


In [58]:
# Adding a column to an empty DataFrame
array = np.array([1, 2, 3])
empty_df['Array_Data'] = array
print(empty_df)


   Array_Data
0           1
1           2
2           3


In [59]:
# Creating a DataFrame from a NumPy array
matrix = np.array([[3, 4, 5], [6, 7, 8], [9, 0, 1]])
print(matrix)

[[3 4 5]
 [6 7 8]
 [9 0 1]]


In [62]:
# Creating a DataFrame with default column names
df = pd.DataFrame(matrix)
df


Unnamed: 0,0,1,2
0,3,4,5
1,6,7,8
2,9,0,1


In [63]:
# Displaying column names
print(df.columns)


RangeIndex(start=0, stop=3, step=1)


In [64]:
# Creating a DataFrame with specified column names
df = pd.DataFrame(matrix, columns=list('ABC'))
df


Unnamed: 0,A,B,C
0,3,4,5
1,6,7,8
2,9,0,1


In [65]:
# Displaying column names
print(df.columns)

Index(['A', 'B', 'C'], dtype='object')


# Creating by Index as well

In [74]:
import pandas as pd

# Creating a DataFrame with specified index and columns
df = pd.DataFrame([[1, 2], [4, 51], [7, 8]],
                  index=['cobra', 'viper', 'sidewinder'],
                  columns=['max_speed', 'shield'])
df

Unnamed: 0,max_speed,shield
cobra,1,2
viper,4,51
sidewinder,7,8


In [75]:
# Displaying column names
print(df.columns)


Index(['max_speed', 'shield'], dtype='object')


In [76]:
# Displaying the shape of the DataFrame
print(df.shape)

(3, 2)


In [77]:
# Displaying information about the DataFrame
print(df.info())


<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, cobra to sidewinder
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   max_speed  3 non-null      int64
 1   shield     3 non-null      int64
dtypes: int64(2)
memory usage: 72.0+ bytes
None


# Manipulating Data

In [93]:
# Creating a DataFrame with specified index and columns
matrix = np.array([[3, 4, 5], [6, 7, 8], [9, 0, 1]])
df = pd.DataFrame(matrix, columns=list('ABC'))
print(df)

   A  B  C
0  3  4  5
1  6  7  8
2  9  0  1


In [94]:
# Adding a new column 'D' with specified values
df['D'] = [3, 4, 5]
print(df)

   A  B  C  D
0  3  4  5  3
1  6  7  8  4
2  9  0  1  5


In [95]:
# Modifying column 'D' with new values
df['D'] = [0, 0, np.NaN]
df['E'] = 100
print(df)

   A  B  C    D    E
0  3  4  5  0.0  100
1  6  7  8  0.0  100
2  9  0  1  NaN  100


In [96]:
# Adding a new column 'F' with random integers
df['F'] = np.random.randint(1, 10, 3)
print(df)

   A  B  C    D    E  F
0  3  4  5  0.0  100  9
1  6  7  8  0.0  100  2
2  9  0  1  NaN  100  5


In [97]:
# Adding a new column 'G' with constant string values
df['G'] = list('str')
print(df)

   A  B  C    D    E  F  G
0  3  4  5  0.0  100  9  s
1  6  7  8  0.0  100  2  t
2  9  0  1  NaN  100  5  r


In [98]:
# Adding a new column 'A' with string values
df['A'] = ["Mr", "Rahul", "Gandhi"]
print(df)



        A  B  C    D    E  F  G
0      Mr  4  5  0.0  100  9  s
1   Rahul  7  8  0.0  100  2  t
2  Gandhi  0  1  NaN  100  5  r


In [99]:
# Adding a new column 'H' with lists as values
df['H'] = [["Praveen", 'is'], ["an", "Data Analyst"], ["Lucknow"]]
print(df)


        A  B  C    D    E  F  G                   H
0      Mr  4  5  0.0  100  9  s       [Praveen, is]
1   Rahul  7  8  0.0  100  2  t  [an, Data Analyst]
2  Gandhi  0  1  NaN  100  5  r           [Lucknow]


In [100]:
# Displaying data types of each column
print(df.dtypes)

A     object
B      int64
C      int64
D    float64
E      int64
F      int64
G     object
H     object
dtype: object


# Lecture 27. Complete Pandas from Scratch - Part 2 __ Delete Records, Slicing & Indexing in DataFrames

# Lecture 28. Pandas from Scratch Part3 __ Loc vs iLoc in DataFrames with detailed Examples

# Lecture 29. Pandas from Scratch - Part 4 __ Sort Rows, Sort Columns & Rearrange Columns in DataFrames

# Lecture 30. How to deal with Null Values in DataFrames ___ Pandas from Scratch - Part 5