**1. How can you create a pandas DataFrame from a dictionary?**

In [1]:
import pandas as pd

# Sample dictionary
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'Los Angeles', 'Chicago']
}

# Creating DataFrame
df = pd.DataFrame(data)
df


Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,Los Angeles
2,Charlie,35,Chicago


**3. How do you get the first 5 rows of a DataFrame?**

In [2]:
# Sample DataFrame
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva', 'Frank'],
    'Age': [25, 30, 35, 40, 45, 50]
})

# Getting the first 5 rows
first_5_rows = df.head()
first_5_rows

Unnamed: 0,Name,Age
0,Alice,25
1,Bob,30
2,Charlie,35
3,David,40
4,Eva,45


**4. How can you select a single column from a DataFrame?**

In [3]:
# Sample DataFrame
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35]
})

# Selecting a single column
age_column = df['Age']
age_column

0    25
1    30
2    35
Name: Age, dtype: int64

**5. How do you select multiple columns from a DataFrame?**

In [4]:
# Sample DataFrame
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'Los Angeles', 'Chicago']
})

# Selecting multiple columns
selected_columns = df[['Name', 'City']]
selected_columns

Unnamed: 0,Name,City
0,Alice,New York
1,Bob,Los Angeles
2,Charlie,Chicago


**8. How do you fill missing values in a DataFrame with a specific value?**

In [5]:
# Sample DataFrame with missing values
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', None],
    'Age': [25, None, 35]
})

# Filling missing values with a specific value
df_filled = df.fillna({'Name':'Charles','Age':30})  # You can replace 'Unknown' with any value you prefer
df_filled

Unnamed: 0,Name,Age
0,Alice,25.0
1,Bob,30.0
2,Charles,35.0


**9. How do you drop rows with missing values in a DataFrame?**

In [6]:
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', None],
    'Age': [25, None, 35]
})

# Dropping rows with missing values
df_dropped = df.dropna()
df_dropped

Unnamed: 0,Name,Age
0,Alice,25.0


**10.How can you group data in a DataFrame by a specific column?**

In [7]:
# Sample DataFrame
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 28, 35, 40],
    'Score': [85, 90, 88, 95, 80]
})

# Grouping data by the 'Name' column
grouped = df.groupby('Name')

# To see the result of the grouping, you can apply an aggregation function, such as mean
result = grouped.mean()
result


Unnamed: 0_level_0,Age,Score
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,26.5,86.5
Bob,32.5,92.5
Charlie,40.0,80.0


**11.How do you compute the mean of a column in a DataFrame?**

In [8]:
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35]
})

# Computing the mean of the 'Age' column
mean_age = df['Age'].mean()
print("Mean Age:", mean_age)


Mean Age: 30.0


**12.How can you add a new column to a DataFrame?**

In [9]:
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35]
})

# Adding a new column with a single value for all rows
df['City'] = 'Nwe York'

# Adding a new column with different values for each row
df['Score'] = [85, 90, 80]
df

Unnamed: 0,Name,Age,City,Score
0,Alice,25,Nwe York,85
1,Bob,30,Nwe York,90
2,Charlie,35,Nwe York,80


**13.How do you drop a column from a DataFrame?**

In [10]:
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'Los Angeles', 'Chicago']
})

# Dropping a column
df.drop(columns='City', inplace=True)
df

Unnamed: 0,Name,Age
0,Alice,25
1,Bob,30
2,Charlie,35


**14.How can you sort a DataFrame by values in a specific column?**

In [11]:
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [55, 37, 59]
})

# Sorting by the 'Age' column in ascending order
df_sorted = df.sort_values(by=['Age','Name'])
df_sorted

Unnamed: 0,Name,Age
1,Bob,37
0,Alice,55
2,Charlie,59


**15.How do you reset the index of a DataFrame?**

In [12]:
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35]
}, index=['a', 'b', 'c'])

# Resetting the index
df_reset = df.reset_index(drop=True)
df_reset

Unnamed: 0,Name,Age
0,Alice,25
1,Bob,30
2,Charlie,35


**16.How can you merge two DataFrames based on a common column?**

In [13]:
df1 = pd.DataFrame({
    'ID': [1, 2, 3],
    'Name': ['Alice', 'Bob', 'Charlie']
})

df2 = pd.DataFrame({
    'ID': [2, 3, 4],
    'Age': [30, 35, 40]
})

# Merging DataFrames on the 'ID' column
merged_df = pd.merge(df1, df2, on='ID')
merged_df

Unnamed: 0,ID,Name,Age
0,2,Bob,30
1,3,Charlie,35


**17.How do you join two DataFrames on index?**

In [14]:
df1 = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie']
}, index=[0, 1, 2])

df2 = pd.DataFrame({
    'Age': [25, 30, 35]
}, index=[1, 2, 3])

# Joining DataFrames on index
joined_df = df1.join(df2)
joined_df

Unnamed: 0,Name,Age
0,Alice,
1,Bob,25.0
2,Charlie,30.0


**18.How do you pivot a DataFrame to reshape it?**

In [15]:
df = pd.DataFrame({
    'Date': ['2024-01-01', '2024-01-01', '2024-01-02', '2024-01-02'],
    'City': ['New York', 'Los Angeles', 'New York', 'Los Angeles'],
    'Temperature': [55, 75, 50, 70]
})
df

# Pivoting the DataFrame
pivoted_df = df.pivot(index='Date', columns='City', values='Temperature')
pivoted_df

City,Los Angeles,New York
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-01-01,75,55
2024-01-02,70,50


**19.How can you calculate the cumulative sum of a column in a DataFrame?**

In [16]:
df = pd.DataFrame({
    'Value': [10, 20, 30, 40]
})

# Calculating the cumulative sum of the 'Value' column
df['Cumulative Sum'] = df['Value'].cumsum()
df


Unnamed: 0,Value,Cumulative Sum
0,10,10
1,20,30
2,30,60
3,40,100


**20.How do you change the data type of a column in a DataFrame?**

In [17]:
df = pd.DataFrame({
    'Value': ['10', '20', '30', '40']  # Initially as strings
})

# Changing the data type of the 'Value' column to integer
df['Value'] = df['Value'].astype(int)
df

Unnamed: 0,Value
0,10
1,20
2,30
3,40


**21.How do you find the correlation between columns in a DataFrame?**

In [18]:
import pandas as pd

# Sample DataFrame
df = pd.DataFrame({
    'A': [1, 2, 3, 4],
    'B': [4, 3, 2, 1],
    'C': [1, 3, 5, 7]
})

# Calculating the correlation matrix
correlation_matrix = df.corr()
correlation_matrix

Unnamed: 0,A,B,C
A,1.0,-1.0,1.0
B,-1.0,1.0,-1.0
C,1.0,-1.0,1.0


**23.How do you filter rows of a DataFrame using multiple conditions?**

In [19]:
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 35, 40],
    'City': ['New York', 'Los Angeles', 'New York', 'Chicago']
})

# Filtering rows with multiple conditions
filtered_df = df[(df['Age'] > 30) & (df['City'] == 'New York')]
filtered_df

Unnamed: 0,Name,Age,City
2,Charlie,35,New York


**24.How can you apply a custom function to each element in a DataFrame
column?**

In [20]:
df = pd.DataFrame({
    'Value': [1, 2, 3, 4]
})

# Define a custom function
def custom_function(x):
    return x ** 2  # Example: squaring the value

# Applying the custom function to the 'Value' column
df['Transformed'] = df['Value'].apply(custom_function)
df

Unnamed: 0,Value,Transformed
0,1,1
1,2,4
2,3,9
3,4,16


**25.How do you set a specific column as the index of a DataFrame?**

In [21]:
df = pd.DataFrame({
    'ID': [1, 2, 3],
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35]
})

# Setting the 'ID' column as the index
df.set_index('ID', inplace=True)
df

Unnamed: 0_level_0,Name,Age
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Alice,25
2,Bob,30
3,Charlie,35
