# dataframe operations

In [2]:
# value_counts():
# value_counts() is used to count the occurrences of unique values in a column.
import pandas as pd
data = {'Category': ['A', 'B', 'A', 'C', 'A', 'B', 'B', 'C']}
df = pd.DataFrame(data)
# Count occurrences of each unique value in 'Category'
value_counts = df['Category'].value_counts()
print("Value Counts:\n", value_counts)

Value Counts:
 Category
A    3
B    3
C    2
Name: count, dtype: int64


In [4]:
# map():
# map() is used to map values from one domain to another.

mapping = {'A': 'Apple', 'B': 'Banana', 'C': 'Cherry'}
# Map values in 'Category' column
result_map = df['Category'].map(mapping)
print("Result of map:\n", result_map)

Result of map:
 0     Apple
1    Banana
2     Apple
3    Cherry
4     Apple
5    Banana
6    Banana
7    Cherry
Name: Category, dtype: object


In [6]:
import pandas as pd
data = {
    'A': [1, 2, 3, 4],
    'B': [10, 20, 30, 40],
    'C': [100, 200, 300, 400]
}

df = pd.DataFrame(data)

# Function to multiply by 2
def multiply_by_2(x):
    return x * 2
# Apply the function to each column
df_applied = df.apply(multiply_by_2)
print(df_applied)

   A   B    C
0  2  20  200
1  4  40  400
2  6  60  600
3  8  80  800


In [7]:
# Apply a Function to Each Row
# Function to sum up the values in a row
def sum_row(row):
    return row.sum()
# Apply the function to each row
df['Row_Sum'] = df.apply(sum_row, axis=1)

print(df)

   A   B    C  Row_Sum
0  1  10  100      111
1  2  20  200      222
2  3  30  300      333
3  4  40  400      444


In [8]:
#groupby()
import pandas as pd
data = {
    'Category': ['A', 'A', 'B', 'B', 'C', 'C', 'C'],
    'Values': [100, 150, 200, 250, 300, 350, 400]
}
df = pd.DataFrame(data)
# Group by 'Category' and calculate the sum of 'Values'
grouped = df.groupby('Category')['Values'].sum()

print(grouped)

Category
A     250
B     450
C    1050
Name: Values, dtype: int64


In [10]:
# unique Method
# The unique method returns an array of the unique values in a Series.
import pandas as pd

data = {
    'Category': ['A', 'A', 'B', 'B', 'C', 'C', 'C'],
    'Values': [100, 150, 200, 250, 300, 350, 400]
}

df = pd.DataFrame(data)
# Get unique values in the 'Category' column
unique_categories = df['Category'].unique()
print(unique_categories)


['A' 'B' 'C']


In [11]:
# nunique method
# Get the number of unique values in the 'Category' column
num_unique_categories = df['Category'].nunique()
print(num_unique_categories)

3


In [12]:
# Get the number of unique values in each column
num_unique_each_column = df.nunique()
print(num_unique_each_column)

Category    3
Values      7
dtype: int64


In [13]:
# Handling NaN Values
# By default, unique includes NaN values, while nunique excludes NaN values unless specified otherwise. Here is an example:
data_with_nan = {
    'Category': ['A', 'A', 'B', 'B', 'C', 'C', None],
    'Values': [100, 150, 200, 250, 300, 350, 400]
}

df_with_nan = pd.DataFrame(data_with_nan)
# Get unique values in the 'Category' column, including NaN
unique_categories_with_nan = df_with_nan['Category'].unique()
print(unique_categories_with_nan)

['A' 'B' 'C' None]


In [15]:
# sort_values()
# Sorting a DataFrame by a Single Column
import pandas as pd
data = {
    'Category': ['A', 'C', 'B', 'A', 'C', 'B', 'A'],
    'Values': [100, 300, 200, 150, 350, 250, 400]
}
df = pd.DataFrame(data)
# Sort by 'Values' column in descending order
sorted_df = df.sort_values(by='Values',ascending=False)
print(sorted_df)

  Category  Values
6        A     400
4        C     350
1        C     300
5        B     250
2        B     200
3        A     150
0        A     100


In [18]:
# Merge
import pandas as pd
df1 = pd.DataFrame({
    'Key': ['A', 'B', 'C', 'D'],
    'Value1': [1, 2, 3, 4]
})
df2 = pd.DataFrame({
    'Key': ['B', 'D', 'E', 'F'],
    'Value2': [5, 6, 7, 8]
})
merged_df = pd.merge(df1, df2, on='Key')
print(merged_df)

  Key  Value1  Value2
0   B       2       5
1   D       4       6


In [19]:
# describe()
# describe() generates descriptive statistics for the DataFrame.
import pandas as pd
data = {'A': [1, 2, 3, 4, 5], 'B': [10, 20, 30, 40, 50], 'C': [100, 200, 300, 400, 500]}
df = pd.DataFrame(data)
# Generate descriptive statistics
desc_stats = df.describe()
print("Descriptive Statistics:\n", desc_stats)

Descriptive Statistics:
               A          B           C
count  5.000000   5.000000    5.000000
mean   3.000000  30.000000  300.000000
std    1.581139  15.811388  158.113883
min    1.000000  10.000000  100.000000
25%    2.000000  20.000000  200.000000
50%    3.000000  30.000000  300.000000
75%    4.000000  40.000000  400.000000
max    5.000000  50.000000  500.000000


In [20]:
# Replacing Specific Values
import pandas as pd
data = {
    'Category': ['A', 'B', 'C', 'A', 'B', 'C'],
    'Values': [1, 2, 3, 4, 5, 6]
}

df = pd.DataFrame(data)
# Replace 'A' with 'X' in 'Category' column
df_replaced = df.replace({'Category': {'A': 'X'}})
print(df_replaced)

  Category  Values
0        X       1
1        B       2
2        C       3
3        X       4
4        B       5
5        C       6


In [21]:
#  Renaming Columns
import pandas as pd
data = {
    'A': [1, 2, 3, 4],
    'B': [10, 20, 30, 40],
    'C': [100, 200, 300, 400]
}

df = pd.DataFrame(data)
# To rename columns 'A' to 'Alpha' and 'B' to 'Beta':
df_renamed = df.rename(columns={'A': 'Alpha', 'B': 'Beta'})
print(df_renamed)

   Alpha  Beta    C
0      1    10  100
1      2    20  200
2      3    30  300
3      4    40  400
