# Python Pandas General Useful Functions

In [4]:
# Generate synthetic dataframe to use for illustration
import pandas as pd

data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'Los Angeles', 'Chicago']
}

df = pd.DataFrame(data)

print(df)

      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago


## len(df) 

Number of rows in dataframe. Alternate: shape[0]

In [5]:

# Assuming you have a pandas DataFrame named 'df'
num_rows = len(df)

print("Number of rows in the DataFrame:", num_rows)

# alternate: df.shape[0]

Number of rows in the DataFrame: 3


## len(df.columns) 

Number of columns. Alternate: shape[1]

In [6]:
# Assuming you have a pandas DataFrame named 'df'
num_columns = len(df.columns)

print("Number of columns in the DataFrame:", num_columns)

Number of columns in the DataFrame: 3


## df.insert() 

Here's a code stub using df.insert() to insert a new column named "Category" into the DataFrame df at index 1, filled with the values "A", "B", and "C":

In [7]:
# Assuming you have a pandas DataFrame named 'df'
df.insert(1, "Category", ["A", "B", "C"])

print(df)

      Name Category  Age         City
0    Alice        A   25     New York
1      Bob        B   30  Los Angeles
2  Charlie        C   35      Chicago


## df.drop_duplicates(subset)

Here's a code stub using df.drop_duplicates(subset) to drop duplicates from a subset of a pandas DataFrame:

In [8]:
# Create a sample DataFrame with duplicate values in the 'Name' column
data = {'Name': ['Alice', 'Bob', 'Alice', 'Charlie'],
        'Age': [25, 30, 25, 35]}
df = pd.DataFrame(data)

# Drop duplicates based on the 'Name' column
df = df.drop_duplicates(subset=['Name'])

print(df)

      Name  Age
0    Alice   25
1      Bob   30
3  Charlie   35


This code will remove duplicate rows based on the values in the 'Name' column. You can specify other columns in the subset argument to drop duplicates based on those columns as well.

## df.dropna()

subset = list of cols, inplace=false

In [None]:
import pandas as pd

# Create a sample DataFrame with missing values
data = {'A': [1, 2, None, 4],
        'B': ['a', 'b', None, 'd'],
        'C': [10, 20, 30, None]}
df = pd.DataFrame(data)

# Drop rows containing any missing values
df_dropped = df.dropna()

print(df_dropped)

This code will output:

   A  B   C
0  1  a  10
1  2  b  20

As you can see, the rows containing missing values (NaN) have been dropped from the resulting DataFrame df_dropped.

You can also use the how parameter of df.dropna() to specify whether to drop rows with any missing values (how='any') or only rows with all missing values (how='all').

## df.rename(dict)

rename cols according to dict

In [1]:
import pandas as pd

# Create a sample DataFrame with original column names
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 30, 35],
        'City': ['New York', 'Los Angeles', 'Chicago']}
df = pd.DataFrame(data)

# Create a dictionary to map old column names to new column names
new_names = {'Name': 'First Name', 'Age': 'Year of Birth', 'City': 'Location'}

# Rename the columns using the dictionary
df = df.rename(columns=new_names)

print(df)

  First Name  Year of Birth     Location
0      Alice             25     New York
1        Bob             30  Los Angeles
2    Charlie             35      Chicago


This code will output:

  First Name  Year of Birth  Location
0      Alice           25    New York
1        Bob           30  Los Angeles
2    Charlie           35      Chicago

As you can see, the column names have been successfully renamed according to the specified dictionary.

## df.fillna()

## pd.concat([df list], axis=0 or 1)

## df.pivot(index, columns, values)

## pd.melt(df, id_vars, value_vars, var_name)

unpivot df

## df.sort_values(column, inplace, ascending)