In [9]:
#Ans1:
#a.read_csv():Reads a CSV file into a DataFrame.
import pandas as pd
df = pd.read_csv('file.csv') 
print(df.head())


#b.groupby(): Groups DataFrame using a mapper or by a Series of columns.
df = pd.DataFrame({
    'Category': ['A', 'A', 'B', 'B'],
    'Value': [10, 20, 30, 40]
})
grouped = df.groupby('Category').sum()
print(grouped)


#c.merge(): Merges two DataFrames based on a common column.
df1 = pd.DataFrame({
    'ID': [1, 2, 3],
    'Name': ['Alice', 'Bob', 'Charlie']
})
df2 = pd.DataFrame({
    'ID': [1, 2, 4],
    'Score': [85, 90, 95]
})
merged_df = pd.merge(df1, df2, on='ID', how='inner')
print(merged_df)


#d.pivot_table(): Creates a pivot table for summarizing data.
df = pd.DataFrame({
    'Date': ['2024-01-01', '2024-01-01', '2024-01-02', '2024-01-02'],
    'Category': ['A', 'B', 'A', 'B'],
    'Sales': [100, 150, 200, 250]
})
pivot_table = df.pivot_table(values='Sales', index='Date', columns='Category', aggfunc='sum')
print(pivot_table)

In [13]:
#Ans2:
import pandas as pd
def reindex_dataframe(df):
    new_index = range(1, len(df) * 2 + 1, 2)
    df_reindexed = df.copy()
    df_reindexed.index = new_index
    return df_reindexed
data = {
    'A': [10, 20, 30],
    'B': [40, 50, 60],
    'C': [70, 80, 90]
}
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)
df_reindexed = reindex_dataframe(df)
print("\nRe-indexed DataFrame:")
print(df_reindexed)


Original DataFrame:
    A   B   C
0  10  40  70
1  20  50  80
2  30  60  90

Re-indexed DataFrame:
    A   B   C
1  10  40  70
3  20  50  80
5  30  60  90


In [21]:
#Ans3:
def sum_first_three_values(df):
    values = df['Values']
    first_three = values.head(3)
    sum_first_three = first_three.sum()
    print("Sum of the first three values:", sum_first_three)
data = {
    'Values':[10, 20, 30, 40, 50],
}
df = pd.DataFrame(data)

sum_first_three_values(df)


Sum of the first three values: 60


In [23]:
#Ans4:
def add_word_count_column(df):
    def count_words(text):
        if pd.isna(text) or not text.strip():
            return 0
        return len(text.split())
    df['Word_Count'] = df['Text'].apply(count_words)
    
    return df
data = {
    'Text': [
        'Hello world',
        'Pandas is great for data manipulation',
        'This is a sample text',
        ''
    ]
}
df = pd.DataFrame(data)
df_with_word_count = add_word_count_column(df)
print(df_with_word_count)


                                    Text  Word_Count
0                            Hello world           2
1  Pandas is great for data manipulation           6
2                  This is a sample text           5
3                                                  0


In [None]:
#Ans5:
#a.DataFrame.size:Returns the total number of elements in the DataFrame.
#b.DataFrame.shape: Returns a tuple representing the dimensions of the DataFrame.

In [None]:
#Ans6:To read an Excel file into a Pandas DataFrame, you use the pandas.read_excel() function. This function can read data from Excel files with .xls and .xlsx extensions.

In [29]:
#Ans7:
def extract_username(df):
    def get_username(email):
        # Split the email address at the '@' and take the first part
        if pd.isna(email):
            return None
        return email.split('@')[0]
    df['Username'] = df['Email'].apply(get_username)
    return df
data = {
    'Email': [
        'shubhankar123@gmail.com',
        'shubhp456.com',
        'shubhankar123.ac.in'
    ]
}
df = pd.DataFrame(data)
df_username = extract_username(df)
print(df_username)


                     Email             Username
0  shubhankar123@gmail.com        shubhankar123
1            shubhp456.com        shubhp456.com
2      shubhankar123.ac.in  shubhankar123.ac.in


In [31]:
#Ans8:
def filter_rows(df):
    filtered_df = df[(df['A'] > 5) & (df['B'] < 10)]
    return filtered_df
data = {
    'A': [3, 8, 6, 2, 9],
    'B': [5, 2, 9, 3, 1],
    'C': [1, 7, 4, 5, 2]
}
df = pd.DataFrame(data)
filtered_df = filter_rows(df)
print(filtered_df)

   A  B  C
1  8  2  7
2  6  9  4
4  9  1  2


In [35]:
#Ans9:
def calculate_statistics(df):
    if 'Values' not in df.columns:
        raise ValueError("The DataFrame must contain a column named 'Values'")
    mean_value = df['Values'].mean()
    median_value = df['Values'].median()
    std_dev = df['Values'].std()
    return {
        'Mean': mean_value,
        'Median': median_value,
        'Standard Deviation': std_dev
    }
data = {
    'Values': [10, 20, 30, 40, 50]
}
df = pd.DataFrame(data)
statistics = calculate_statistics(df)
print(statistics)
df

{'Mean': 30.0, 'Median': 30.0, 'Standard Deviation': 15.811388300841896}


Unnamed: 0,Values
0,10
1,20
2,30
3,40
4,50


In [37]:
#Ans10:
import pandas as pd

def add_moving_average(df):
    if 'Sales' not in df.columns or 'Date' not in df.columns:
        raise ValueError("The DataFrame must contain 'Sales' and 'Date' columns")
    df['Date'] = pd.to_datetime(df['Date'])

    # Sort the DataFrame by date to ensure the moving average is calculated correctly
    df = df.sort_values(by='Date')
    df['MovingAverage'] = df['Sales'].rolling(window=7, min_periods=1).mean()
    return df
data = {
    'Date': ['2024-08-01', '2024-08-02', '2024-08-03', '2024-08-04', '2024-08-05',
             '2024-08-06', '2024-08-07', '2024-08-08', '2024-08-09', '2024-08-10'],
    'Sales': [200, 220, 250, 210, 230, 240, 250, 260, 270, 280]
}
df = pd.DataFrame(data)
df_with_moving_avg = add_moving_average(df)
print(df_with_moving_avg)


        Date  Sales  MovingAverage
0 2024-08-01    200     200.000000
1 2024-08-02    220     210.000000
2 2024-08-03    250     223.333333
3 2024-08-04    210     220.000000
4 2024-08-05    230     222.000000
5 2024-08-06    240     225.000000
6 2024-08-07    250     228.571429
7 2024-08-08    260     237.142857
8 2024-08-09    270     244.285714
9 2024-08-10    280     248.571429


In [41]:
#Ans11:
def add_weekday_column(df):
    if 'Date' not in df.columns:
        raise ValueError("The DataFrame must contain a column named 'Date'")
    df['Date'] = pd.to_datetime(df['Date'])
    df['Weekday'] = df['Date'].dt.day_name()
    
    return df
data = {
    'Date': ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']
}
df = pd.DataFrame(data)
df_with_weekday = add_weekday_column(df)
print(df_with_weekday)


        Date    Weekday
0 2023-01-01     Sunday
1 2023-01-02     Monday
2 2023-01-03    Tuesday
3 2023-01-04  Wednesday
4 2023-01-05   Thursday


In [43]:
#Ans12:
def filter_by_date_range(df, start_date, end_date):
    if 'Date' not in df.columns:
        raise ValueError("The DataFrame must contain a column named 'Date'")
    df['Date'] = pd.to_datetime(df['Date'])
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    filtered_df = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
    
    return filtered_df
data = {
    'Date': ['2023-01-01', '2023-01-10', '2023-01-20', '2023-02-01', '2023-01-15'],
    'Value': [100, 200, 300, 400, 500]
}
df = pd.DataFrame(data)
filtered_df = filter_by_date_range(df, '2023-01-01', '2023-01-31')
print(filtered_df)


        Date  Value
0 2023-01-01    100
1 2023-01-10    200
2 2023-01-20    300
4 2023-01-15    500


In [45]:
#Ans13:
import pandas as pd
#The following is the firstmost lib is needed in python for pandas basic fucntions