# Pandas Operations

- In this file you will find all the basic and common opearation performed with the help pf Pandas.

In [1]:
import pandas as pd
import numpy as np

### Create a Series

- There are multiple ways by which we can create the Series
1. using list
2. using numpy array
3. using dictionary
4. from scalar value
5. from a range

In [3]:
# Using list
data_list = [1,2,3,4,5]
series_from_list = pd.Series(data_list)
print(series_from_list)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [4]:
# Using Numpy array
numpy_array = np.array([1,3,2,4,5,5])
series_from_array = pd.Series(numpy_array)
print(series_from_array)

0    1
1    3
2    2
3    4
4    5
5    5
dtype: int32


In [5]:
# From Dictionary
data_dict = {'a':1,'b':2,'c':3}
series_from_dict = pd.Series(data_dict)
print(series_from_dict)

a    1
b    2
c    3
dtype: int64


In [6]:
# From scalar value
scalar_value = 5
index_label = ['a','b','c','d','e']
series_from_scalar = pd.Series(scalar_value, index=index_label)
print(series_from_scalar)

a    5
b    5
c    5
d    5
e    5
dtype: int64


In [7]:
# From a Range
start = 0
stop = 50
step = 10
series_range = pd.Series(pd.RangeIndex(start,stop,step))
print(series_range)

0     0
1    10
2    20
3    30
4    40
dtype: int64


### Create the DataFrame

- There are multiple ways to create the `DataFrame`
1. From Dictionary of list or arrays
2. From lists of dictionary
3. From list of lists or arrays with column and Index Labels


In [8]:
# From dictionary of lists or arrays
data = {'Name': ['John', 'Emma', 'James', 'Olivia', 'William'],
        'Age': [25, 30, 35, 40, 45],
        'Gender': ['Male', 'Female', 'Male', 'Female', 'Male']}
df_from_dict = pd.DataFrame(data)
print(df_from_dict)

      Name  Age  Gender
0     John   25    Male
1     Emma   30  Female
2    James   35    Male
3   Olivia   40  Female
4  William   45    Male


In [9]:
# From list of dictionary

data = [{'Name': 'John', 'Age': 25, 'Gender': 'Male'},
        {'Name': 'Emma', 'Age': 30, 'Gender': 'Female'},
        {'Name': 'James', 'Age': 35, 'Gender': 'Male'},
        {'Name': 'Olivia', 'Age': 40, 'Gender': 'Female'},
        {'Name': 'William', 'Age': 45, 'Gender': 'Male'}]
df_from_list_of_dicts = pd.DataFrame(data)
print(df_from_list_of_dicts)

      Name  Age  Gender
0     John   25    Male
1     Emma   30  Female
2    James   35    Male
3   Olivia   40  Female
4  William   45    Male


In [10]:
# From list of lists or arrays with column and Index labels

data = [['John', 25, 'Male'],
        ['Emma', 30, 'Female'],
        ['James', 35, 'Male'],
        ['Olivia', 40, 'Female'],
        ['William', 45, 'Male']]
columns = ['Name', 'Age', 'Gender']
index = ['A', 'B', 'C', 'D', 'E']
df_from_list_of_lists = pd.DataFrame(data, columns=columns, index=index)
print(df_from_list_of_lists)


      Name  Age  Gender
A     John   25    Male
B     Emma   30  Female
C    James   35    Male
D   Olivia   40  Female
E  William   45    Male


### Add rows in the existing Dataframe
- There are multiple ways to create the Dataframe
1. using appending rows to dataframe
    - As this append function is going to be removed from the latest version of the pandas library, it is suggested that no to use this function
2. Using loc method
2. using concat

In [27]:
# We can add multiple rows by using this append function

# Existing DataFrame
data = {'Name': ['John', 'Emma', 'James'],
        'Age': [25, 30, 35],
        'Gender': ['Male', 'Female', 'Male']}
df = pd.DataFrame(data)

# Dictionary representing the new row
new_row = {'Name': 'Olivia', 'Age': 40, 'Gender': 'Female'}

# Appending the new row
df = df.append(new_row, ignore_index=True)
print(df)

     Name  Age  Gender
0    John   25    Male
1    Emma   30  Female
2   James   35    Male
3  Olivia   40  Female


  df = df.append(new_row, ignore_index=True)


In [28]:
import pandas as pd

# Existing DataFrame
data = {'Name': ['John', 'Emma', 'James'],
        'Age': [25, 30, 35],
        'Gender': ['Male', 'Female', 'Male']}
df = pd.DataFrame(data)

# Adding a row using loc
df.loc[3] = ['Olivia', 40, 'Female']
print(df)


     Name  Age  Gender
0    John   25    Male
1    Emma   30  Female
2   James   35    Male
3  Olivia   40  Female


In [29]:
# Using the concat function

# Existing DataFrame
data = {'Name': ['John', 'Emma', 'James'],
        'Age': [25, 30, 35],
        'Gender': ['Male', 'Female', 'Male']}
df = pd.DataFrame(data)

# Dictionary representing the new row
new_row = {'Name': ['Olivia'], 'Age': [40], 'Gender': ['Female']}
temp = pd.DataFrame(new_row)

# Concating the dataframe
df = pd.concat([df, temp])
print(df)

     Name  Age  Gender
0    John   25    Male
1    Emma   30  Female
2   James   35    Male
0  Olivia   40  Female


### Data Loading and Saving

- Load the SQL data

In [None]:
# Laoding the SQL data into pandas dataframe
import pandas as pd
import sqlite3

# Establishing a connection to the SQL database
conn = sqlite3.connect('your_database.db')  # Replace 'your_database.db' with the path to your SQLite database
# If you are using a different SQL database like MySQL, PostgreSQL, etc.,
# you would need to use appropriate connection parameters and libraries for that specific database.

# Writing your SQL query
query = "SELECT * FROM your_table;"  # Replace 'your_table' with the name of the table you want to query

# Loading data into a DataFrame
df = pd.read_sql(query, conn)

# Closing the database connection
conn.close()

# Displaying the DataFrame
print(df)


- Load the html file

In [None]:
import pandas as pd

# Reading HTML file into a list of DataFrames
dfs = pd.read_html('your_file.html')  # Replace 'your_file.html' with the path to your HTML file

# Accessing the DataFrame(s) from the list
for i, df in enumerate(dfs):
    print(f"DataFrame {i + 1}:")
    print(df)
    print()


- Load the json file

In [None]:
import pandas as pd

# Reading JSON file into a DataFrame
df = pd.read_json('your_file.json')  # Replace 'your_file.json' with the path to your JSON file

# Displaying the DataFrame
print(df)

### Reshape the data

- Pivot table

**pivot(): Reshape data based on column values.**

In [30]:
# Creating a DataFrame
data = {'Date': ['2022-01-01', '2022-01-02', '2022-01-03'],
        'City': ['New York', 'Los Angeles', 'Chicago'],
        'Temperature': [32, 75, 50]}
df = pd.DataFrame(data)

# Reshaping using pivot
pivot_df = df.pivot(index='Date', columns='City', values='Temperature')
print(pivot_df)

City        Chicago  Los Angeles  New York
Date                                      
2022-01-01      NaN          NaN      32.0
2022-01-02      NaN         75.0       NaN
2022-01-03     50.0          NaN       NaN


**pivot_table(): Reshape data while handling duplicate entries by aggregating values.**

In [32]:
# Using pivot_table
pivot_table_df = df.pivot_table(index='Date', columns='City', values='Temperature', aggfunc='mean')
print(pivot_table_df)

City        Chicago  Los Angeles  New York
Date                                      
2022-01-01      NaN          NaN      32.0
2022-01-02      NaN         75.0       NaN
2022-01-03     50.0          NaN       NaN


**stack(): Reshape data by pivoting the innermost level of column labels.**

In [33]:
# Using stack
stacked_df = pivot_df.stack()
print(stacked_df)

Date        City       
2022-01-01  New York       32.0
2022-01-02  Los Angeles    75.0
2022-01-03  Chicago        50.0
dtype: float64


**unstack(): Reverse the operation of stack, pivoting the innermost level of row labels**

In [35]:
# Using unstack
unstacked_df = stacked_df.unstack()
print(unstacked_df)

City        Chicago  Los Angeles  New York
Date                                      
2022-01-01      NaN          NaN      32.0
2022-01-02      NaN         75.0       NaN
2022-01-03     50.0          NaN       NaN


### Concat

In [37]:
# Creating two DataFrames
df1 = pd.DataFrame({'A': [1, 2, 3],
                    'B': [4, 5, 6]})
df2 = pd.DataFrame({'A': [7, 8, 9],
                    'B': [10, 11, 12]})

# Concatenating along rows
result = pd.concat([df1, df2], axis=0)
print(result)

   A   B
0  1   4
1  2   5
2  3   6
0  7  10
1  8  11
2  9  12


### Merge

- Merge function is based on the column's values. It allows you to merge the dataframe based on common columns or columns with different names.

- Inner Merge : An inner merge returns only the rows that have matching values in both DataFrames.

In [39]:
# Creating two DataFrames
df1 = pd.DataFrame({'key': ['A', 'B', 'C', 'D'],
                    'value': [1, 2, 3, 4]})
df2 = pd.DataFrame({'key': ['B', 'D', 'E', 'F'],
                    'value': [5, 6, 7, 8]})

# Performing inner merge
inner_merge = pd.merge(df1, df2, on='key', how='inner')
print(inner_merge)


  key  value_x  value_y
0   B        2        5
1   D        4        6


- Left merge : A left merge returns all the rows from the left DataFrame and the matched rows from the right DataFrame. If there is no match, NaN values are filled.

In [40]:
# Performing left merge
left_merge = pd.merge(df1, df2, on='key', how='left')
print(left_merge)

  key  value_x  value_y
0   A        1      NaN
1   B        2      5.0
2   C        3      NaN
3   D        4      6.0


- Right Merge : A right merge returns all the rows from the right DataFrame and the matched rows from the left DataFrame. If there is no match, NaN values are filled.

In [41]:
# Performing right merge
right_merge = pd.merge(df1, df2, on='key', how='right')
print(right_merge)


  key  value_x  value_y
0   B      2.0        5
1   D      4.0        6
2   E      NaN        7
3   F      NaN        8


- Outer Merge : An outer merge returns all the rows from both DataFrames and fills in NaN values for missing matches.

In [43]:
# Performing outer merge
outer_merge = pd.merge(df1, df2, on='key', how='outer')
print(outer_merge)


  key  value_x  value_y
0   A      1.0      NaN
1   B      2.0      5.0
2   C      3.0      NaN
3   D      4.0      6.0
4   E      NaN      7.0
5   F      NaN      8.0


### Join

- Join is based on the indexes of the dataframe. It allows you to join the Dataframes based on theie Indexes