# New Columns
When creating a new column based on YOUR equation, simply define the column you want into the dataframe 

In [1]:
import pandas as pd
import numpy as np

dataset = {
    'column1': np.random.rand(10),  # 10 random FLOATS between 0 and 1
    'column2': np.random.randint(0, 100, 10),  # 10 random INTEGERS between 0 and 99
    'column3': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C', 'A'] # 10 categorical values
}

sample_df = pd.DataFrame(dataset)
sample_df

Unnamed: 0,column1,column2,column3
0,0.471112,1,A
1,0.614496,20,B
2,0.088595,87,C
3,0.604786,31,A
4,0.31985,11,B
5,0.073182,26,C
6,0.565403,66,A
7,0.061892,18,B
8,0.719809,47,C
9,0.022883,31,A


In [7]:
# DO THIS
sample_df['new_column'] = sample_df['column2'] + 4
sample_df

Unnamed: 0,column1,column2,column3,new_column
0,0.956968,82,A,86
1,0.751426,55,B,59
2,0.003287,20,C,24
3,0.297442,19,A,23
4,0.54347,95,B,99
5,0.371001,90,C,94
6,0.324466,86,A,90
7,0.837255,91,B,95
8,0.008193,71,C,75
9,0.255823,41,A,45


# Chaining sorting and subsetting with new columns

In [13]:
# CREATE NEW COLUMN
sample_df['NEWEST_COL'] = sample_df['column2'] + sample_df['new_column']

# FILTER BASED ON THE HIGHEST VALUE
highest_value = sample_df[sample_df['NEWEST_COL'] > 90]

# SORT BASED ON COLUMN1 and NEWEST_COL
sorted_values = highest_value.sort_values(['column1', 'NEWEST_COL'], ascending=(False, True))

# SPECIFIC COLUMNS ONLY
specific_values= sorted_values[['column1', 'NEWEST_COL']]

specific_values

Unnamed: 0,column1,NEWEST_COL
0,0.956968,168
7,0.837255,186
1,0.751426,114
4,0.54347,194
5,0.371001,184
6,0.324466,176
8,0.008193,146


# Dropping a Column

In [3]:
# EXAMPLE

sample_df['new_column'] = sample_df['column2'] + 4
sample_df

sample_df.drop(columns=['new_column'], inplace=True)
sample_df

Unnamed: 0,column1,column2,column3
0,0.471112,1,A
1,0.614496,20,B
2,0.088595,87,C
3,0.604786,31,A
4,0.31985,11,B
5,0.073182,26,C
6,0.565403,66,A
7,0.061892,18,B
8,0.719809,47,C
9,0.022883,31,A
