In [1]:
'''
pd.read_csv(): Reads a CSV file into a DataFrame.
df.head(): Returns the first n rows of the DataFrame.
df.describe(): Generates descriptive statistics of the DataFrame.
df.groupby(): Groups the DataFrame using a mapper or by a series of columns.
df.merge(): Merges DataFrame or named series objects with a database-style join.
'''

import pandas as pd

data = {'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}
df = pd.DataFrame(data)


print(df.head(1))

print(df.describe())

grouped = df.groupby('A').sum()
print(f'grouped',grouped)

df2 = pd.DataFrame({'A': [1, 2, 4], 'D': [10, 11, 12]})
merged_df = pd.merge(df, df2, on='A', how='left')
print(merged_df)


   A  B  C
0  1  4  7
         A    B    C
count  3.0  3.0  3.0
mean   2.0  5.0  8.0
std    1.0  1.0  1.0
min    1.0  4.0  7.0
25%    1.5  4.5  7.5
50%    2.0  5.0  8.0
75%    2.5  5.5  8.5
max    3.0  6.0  9.0
grouped    B  C
A      
1  4  7
2  5  8
3  6  9
   A  B  C     D
0  1  4  7  10.0
1  2  5  8  11.0
2  3  6  9   NaN


In [2]:
'''question 2'''
def reindex_dataframe(df):
    df.index = range(1, 2*len(df) + 1, 2)
    return df

# Sample DataFrame
data = {'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}
df = pd.DataFrame(data)
print(reindex_dataframe(df))


   A  B  C
1  1  4  7
3  2  5  8
5  3  6  9


In [3]:
'''Question 3'''
def sum_first_three(df):
    sum_values = df['Values'][:3].sum()
    print(f"The sum of the first three values is: {sum_values}")

# Sample DataFrame
data = {'Values': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)
print(df)
sum_first_three(df)


   Values
0      10
1      20
2      30
3      40
4      50
The sum of the first three values is: 60


In [24]:
'''question 4'''
def add_word_count(df):
    df['Word_Count'] = df['Text'].apply(lambda x: len(str(x).split(" ")))
    return df

# Sample DataFrame
data = {'Text': ["Hello world", "Pandas is great", "Python programming",]}
df = pd.DataFrame(data)
print(df)
print(add_word_count(df))


                 Text
0         Hello world
1     Pandas is great
2  Python programming
                 Text  Word_Count
0         Hello world           2
1     Pandas is great           3
2  Python programming           2


In [5]:
'''Question 5'''
'''
DataFrame.size: Returns the number of elements in the DataFrame (i.e., number of rows * number of columns).
DataFrame.shape: Returns a tuple representing the dimensionality of the DataFrame (i.e., number of rows, number of columns).
'''

'\nDataFrame.size: Returns the number of elements in the DataFrame (i.e., number of rows * number of columns).\nDataFrame.shape: Returns a tuple representing the dimensionality of the DataFrame (i.e., number of rows, number of columns).\n'

In [12]:
'''question 6'''
'''
function used to read excel file is pd.read_excel()
'''

'\nfunction used to read excel file is pd.read_excel()\n'

In [31]:
'''Question 7'''
def extract_username(df):
    df['Username'] = df['Email'].apply(lambda x: x.split('@')[0])
    return df

# Sample DataFrame
data = {'Email': ['john.doe@example.com', 'jane.smith@example.com']}
df = pd.DataFrame(data)
print(extract_username(df))


                    Email    Username
0    john.doe@example.com    john.doe
1  jane.smith@example.com  jane.smith


In [61]:
'''question 8'''

def selectrows(df):
    selected=df[(df['A']>5) & (df['B']<10)]
    return selected

data = {'A': [8, 86, 3], 'B': [66, 5, 8], 'C': [7, 8, 9]}
df = pd.DataFrame(data)
print(selectrows(df))

    A  B  C
1  86  5  8


In [60]:
'''question 9'''

def calculate_statistics(df):
    mean = df['Values'].mean()
    median = df['Values'].median()
    std_dev = df['Values'].std()
    return mean, median, std_dev

# Sample DataFrame
data = {'Values': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)
mean, median, std_dev = calculate_statistics(df)
print(f"Mean: {mean}, Median: {median}, Standard Deviation: {std_dev}")


Mean: 30.0, Median: 30.0, Standard Deviation: 15.811388300841896


In [59]:
'''question 10'''

def add_moving_average(df):
    df['MovingAverage'] = df['Sales'].rolling(window=7, min_periods=1).mean()
    return df

# Sample DataFrame
data = {'Sales': [100,1, 200,4545, 300, 400,355, 500,545, 600, 700, 800, 900,455, 1000], 
        'Date': pd.date_range(start='1/1/2023', periods=15)}
df = pd.DataFrame(data)
print(df)
print(add_moving_average(df))


    Sales       Date
0     100 2023-01-01
1       1 2023-01-02
2     200 2023-01-03
3    4545 2023-01-04
4     300 2023-01-05
5     400 2023-01-06
6     355 2023-01-07
7     500 2023-01-08
8     545 2023-01-09
9     600 2023-01-10
10    700 2023-01-11
11    800 2023-01-12
12    900 2023-01-13
13    455 2023-01-14
14   1000 2023-01-15
    Sales       Date  MovingAverage
0     100 2023-01-01     100.000000
1       1 2023-01-02      50.500000
2     200 2023-01-03     100.333333
3    4545 2023-01-04    1211.500000
4     300 2023-01-05    1029.200000
5     400 2023-01-06     924.333333
6     355 2023-01-07     843.000000
7     500 2023-01-08     900.142857
8     545 2023-01-09     977.857143
9     600 2023-01-10    1035.000000
10    700 2023-01-11     485.714286
11    800 2023-01-12     557.142857
12    900 2023-01-13     628.571429
13    455 2023-01-14     642.857143
14   1000 2023-01-15     714.285714


In [54]:
'''question 11'''

def add_weekday(df):
    df['Weekday'] = pd.to_datetime(df['Date']).dt.day_name()
    return df

# Sample DataFrame
data = {'Date': ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']}
df = pd.DataFrame(data)
print(add_weekday(df))


         Date    Weekday
0  2023-01-01     Sunday
1  2023-01-02     Monday
2  2023-01-03    Tuesday
3  2023-01-04  Wednesday
4  2023-01-05   Thursday


In [55]:
'''question 12'''

def select_date_range(df):
    df['Date'] = pd.to_datetime(df['Date'])
    filtered_df = df[(df['Date'] >= '2023-01-01') & (df['Date'] <= '2023-01-31')]
    return filtered_df

# Sample DataFrame
data = {'Date': ['2023-01-01', '2023-02-01', '2023-01-15', '2023-01-25']}
df = pd.DataFrame(data)
print(select_date_range(df))


        Date
0 2023-01-01
2 2023-01-15
3 2023-01-25


In [58]:
'''question 13'''

'''
The first and foremost necessary library to import is pandas:
'''
import pandas as pd