In [None]:
#You can create a Series from a list, NumPy array, or dictionary.

import pandas as pd

data = [10, 20, 30, 40]
series = pd.Series(data)
print(series)

0    10
1    20
2    30
3    40
dtype: int64


In [None]:
#You can access individual elements in a Series by their index label or position.

print(series[2]) # Access by position

30


In [None]:
#A DataFrame can be created from various sources, such as dictionaries, lists of lists, or NumPy arrays.

data = {'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [25, 30, 35]}
df = pd.DataFrame(data)
print(df)

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35


In [None]:
#You can access rows, columns, or subsets of the DataFrame using various methods like .loc[], .iloc[], and direct column access.

print(df['Name']) # Accessing a column by name
print(df.iloc[1]) # Accessing a row by position
print(df.loc[1, 'Name']) # Accessing specific cell by row and column labels

0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object
Name    Bob
Age      30
Name: 1, dtype: object
Bob


In [None]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
print(df.loc[0]) # Access by label (row index)

print("Japan")
print(df.iloc[1]) # Access by position (row index)

A    1
B    4
Name: 0, dtype: int64
Japan
A    2
B    5
Name: 1, dtype: int64


In [None]:
#Adding Columns and Rows: You can add new columns by directly assigning values or by using methods like insert().Removing Data: Use .drop() to remove rows or columns.

df['C'] = [7, 8, 9] # Adding a column

In [None]:
#Grouping in Pandas is done using the .groupby() method. This method allows you to group data based on one or more columns, and then apply aggregation functions like sum(), mean(), or count() to each group.
df = pd.DataFrame({'Category': ['A', 'A', 'B', 'B'], 'Value': [10, 15, 20, 25]})
grouped = df.groupby('Category').sum()
print(grouped)

          Value
Category       
A            25
B            45


In [None]:
#Aggregating Data
#You can apply various aggregate functions (e.g., sum, mean, min, max) across rows or columns of a DataFrame using.aggregate()

df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
print(df.aggregate('sum')) # Apply sum function to all columns

A     6
B    15
dtype: int64


In [None]:
#Detecting Missing Data
#Pandas provides the .isnull() and .notnull() functions to identify missing values in a DataFrame or Series.

import numpy as np

df = pd.DataFrame({'A': [1, np.nan, 3], 'B': [4, 5, np.nan]})
print(df.isnull()) # Detect missing data

       A      B
0  False  False
1   True  False
2  False   True


In [None]:
#Cleaning and Filling Missing Data
'''Missing data can be handled by either removing rows/columns containing NaN values using .dropna()
or filling the missing values using .fillna().'''

df = df.fillna(0) # Filling missing data with 0
print(df)

     A    B
0  1.0  4.0
1  0.0  5.0
2  3.0  0.0


**Working with Time Series Data**

In [None]:
'''Time series data in Pandas can be created using the pd.to datetime() function.
Time series data often requires indexing by date and time.'''

dates = pd.date_range('20230101', periods=6)
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
print(df)

                   A         B         C         D
2023-01-01  0.334946 -1.724745 -1.884682 -0.240324
2023-01-02 -0.018926  0.329640  3.079282  1.232676
2023-01-03 -0.813644 -0.856727  0.159937 -0.158352
2023-01-04 -0.020376  0.028918  0.254747  0.202676
2023-01-05 -1.832331  0.264201  1.047731 -0.461809
2023-01-06 -0.362763 -0.117930 -0.052385  1.090213


In [None]:
#Resampling and Shifting Time Series Data
'''You can resample data to a different frequency (e.g., daily, monthly) using .resample() and perform operations
like aggregation on the resampled data.'''

df_resampled = df.resample('D').mean() # Daily resampling
print(df_resampled)

                   A         B         C         D
2023-01-01  0.334946 -1.724745 -1.884682 -0.240324
2023-01-02 -0.018926  0.329640  3.079282  1.232676
2023-01-03 -0.813644 -0.856727  0.159937 -0.158352
2023-01-04 -0.020376  0.028918  0.254747  0.202676
2023-01-05 -1.832331  0.264201  1.047731 -0.461809
2023-01-06 -0.362763 -0.117930 -0.052385  1.090213


In [None]:
#Merging and Joining Data
'''Merging DataFrames in Pandas is done using the .merge() method. This is similar to SQL joins and allows you to
combine DataFrames based on common columns.'''

df1 = pd.DataFrame({'key': ['A',' B'], 'value': [1, 2]})
df2 = pd.DataFrame({'key': ['A', 'C'], 'value': [3, 4]})
merged = pd.merge(df1, df2, on='key', how='inner')
print(merged)

  key  value_x  value_y
0   A        1        3


In [None]:
#Concatenating DataFrames
#You can concatenate multiple DataFrames along rows or columns using the .concat() method.

df1 = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
df2 = pd.DataFrame([[5, 6], [7, 8]], columns=['A', 'B'])
result = pd.concat([df1, df2], axis=0) # Concatenate along rows
print(result)

   A  B
0  1  2
1  3  4
0  5  6
1  7  8


**Exercise**

In [None]:
'''Problem 1: Create a Pandas Series from the list [5,10,15,20,25]. Access the
third element of the series.'''

a = [5,10,15,20,25]
series = pd.Series(a)
print(series[2])

15


In [None]:
'''Problem 2: Create a DataFrame with the following data:

Data =
⎛
⎜
⎜
⎜
⎜
⎝
Name Age City
Alice 23 New York
Bob 30 Los Angeles
Charlie 28 Chicago
⎞
⎟
⎟
⎟
⎟
⎠

Access the City column of the DataFrame.
'''

data = {'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [23,30,28], 'City': ['New York', 'Los Angeles', 'Chicago']}
df = pd.DataFrame(data)
print(df['City'])

0       New York
1    Los Angeles
2        Chicago
Name: City, dtype: object


In [None]:
'''Problem 3: Create a DataFrame with Name, Age, and Salary columns. Insert
a new column Gender with values [’M’, ’F’, ’M’].
'''

df = pd.DataFrame({'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [23,30,28], 'City': ['New York', 'Los Angeles', 'Chicago']})

df['Gender'] = ['M', 'F', 'M']

print(df)

      Name  Age         City Gender
0    Alice   23     New York      M
1      Bob   30  Los Angeles      F
2  Charlie   28      Chicago      M


In [None]:
'''Problem 4: Remove the Age column from the DataFrame created in Problem 3.
'''

df = df.drop(columns = ['Age'])
print(df)

      Name         City Gender
0    Alice     New York      M
1      Bob  Los Angeles      F
2  Charlie      Chicago      M


In [None]:
'''Problem 5: Create a Series with values [1,2,3] and index labels [′A′,′B′,′C′].
Retrieve the value of label ’B’.
'''

a = [1,2,3]
series = pd.Series(a, index = ['A', 'B', 'C'])

print(series['B'])


2


In [None]:
'''Problem 6: Create a DataFrame with X and Y columns and 5 rows of random
integers between 1 and 100. Find the sum of all values in column X.'''

df = pd.DataFrame(np.random.randint(1, 101, size=(5, 2)), columns=['X', 'Y'])
print(df)
print(df['X'].sum())

    X   Y
0  61   6
1  85   3
2  67  55
3   5  65
4  92   3
310


In [None]:
'''Problem 7: Create a DataFrame and filter out rows where the Age column has
missing values.
'''

