<a href="https://colab.research.google.com/github/princypatel1712/Machine-learning-model/blob/main/Numpy_Pandas_Methods_and_Exercises.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# NumPy and Pandas Methods with Explanations

In [None]:
import numpy as np
import pandas as pd

## NumPy Methods

### np.array()
Creates a NumPy array from a Python list or tuple.

In [None]:
# Example 1: Simple array
arr = np.array([1, 2, 3])
print(arr)

# Example 2: Multi-dimensional array
arr2d = np.array([[1, 2], [3, 4]])
print(arr2d)


[1 2 3]
[[1 2]
 [3 4]]


### np.zeros()
Creates an array filled with zeros.

In [None]:
# Example 1: 1D array
zeros_1d = np.zeros(5)
print(zeros_1d)

# Example 2: 2D array
zeros_2d = np.zeros((3, 4))
print(zeros_2d)


[0. 0. 0. 0. 0.]
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


### np.ones()
Creates an array filled with ones.

In [None]:
# Example 1: 1D array
ones_1d = np.ones(5)
print(ones_1d)

# Example 2: 2D array
ones_2d = np.ones((2, 3))
print(ones_2d)


[1. 1. 1. 1. 1.]
[[1. 1. 1.]
 [1. 1. 1.]]


### np.arange()
Creates an array with evenly spaced values within a given range.

In [None]:
# Example 1: Default step
arr = np.arange(10)
print(arr)

# Example 2: Custom step
arr_step = np.arange(0, 10, 2)
print(arr_step)


[0 1 2 3 4 5 6 7 8 9]
[0 2 4 6 8]


### np.linspace()
Creates an array with linearly spaced values between two bounds.

In [None]:
# Example 1: 5 values between 0 and 1
lin = np.linspace(0, 1, 5)
print(lin)

# Example 2: 10 values between -1 and 1
lin_negative = np.linspace(-1, 1, 10)
print(lin_negative)


[0.   0.25 0.5  0.75 1.  ]
[-1.         -0.77777778 -0.55555556 -0.33333333 -0.11111111  0.11111111
  0.33333333  0.55555556  0.77777778  1.        ]


### np.reshape()
Changes the shape of an array without altering its data.

In [None]:
# Example 1: Reshape 1D to 2D
arr = np.arange(6).reshape((2, 3))
print(arr)

# Example 2: Reshape with -1
arr2 = np.arange(12).reshape((3, -1))
print(arr2)


[[0 1 2]
 [3 4 5]]
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


### np.transpose()
Reverses or permutes the axes of an array.

In [None]:
# Example 1: Transpose of a 2D array
arr = np.array([[1, 2, 3], [4, 5, 6]])
transposed = np.transpose(arr)
print(transposed)

# Example 2: Transpose of a 3D array
arr_3d = np.arange(8).reshape(2, 2, 2)
transposed_3d = np.transpose(arr_3d, axes=(1, 0, 2))
print(transposed_3d)


[[1 4]
 [2 5]
 [3 6]]
[[[0 1]
  [4 5]]

 [[2 3]
  [6 7]]]


### np.dot()
Performs dot product of two arrays.

In [None]:
# Example 1: Dot product of 1D arrays
a = np.array([1, 2])
b = np.array([3, 4])
result = np.dot(a, b)  # (1*3 + 2*4)
print(result)

# Example 2: Dot product of 2D arrays
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6], [7, 8]])
result = np.dot(a, b)
print(result)


11
[[19 22]
 [43 50]]


### np.sum()
Calculates the sum of array elements over a specified axis.

In [None]:
# Example 1: Sum of 1D array
arr = np.array([1, 2, 3])
print(np.sum(arr))

# Example 2: Sum of 2D array
arr2d = np.array([[1, 2], [3, 4]])
print(np.sum(arr2d, axis=0))  # Column-wise sum


6
[4 6]


### np.mean()
Computes the mean of the array elements.

In [None]:
# Example 1: Mean of 1D array
arr = np.array([1, 2, 3, 4])
print(np.mean(arr))

# Example 2: Mean along axis
arr2d = np.array([[1, 2], [3, 4]])
print(np.mean(arr2d, axis=1))  # Row-wise mean


2.5
[1.5 3.5]


### np.std()
Computes the standard deviation of the array elements.

In [None]:
# Example 1: Std of 1D array
arr = np.array([1, 2, 3, 4])
print(np.std(arr))

# Example 2: Std along axis
arr2d = np.array([[1, 2], [3, 4]])
print(np.std(arr2d, axis=0))  # Column-wise std


1.118033988749895
[1. 1.]


### np.min()
Finds the minimum value in the array.

In [None]:
# Example 1: Minimum in a 1D array
arr = np.array([3, 1, 4, 2])
print(np.min(arr))

# Example 2: Minimum along an axis
arr_2d = np.array([[3, 1], [5, 2]])
print(np.min(arr_2d, axis=1))  # Row-wise minimum


1
[1 2]


### np.max()
Finds the maximum value in the array.

In [None]:
# Example 1: Maximum in a 1D array
arr = np.array([3, 1, 4, 2])
print(np.max(arr))

# Example 2: Maximum along an axis
arr_2d = np.array([[3, 1], [5, 2]])
print(np.max(arr_2d, axis=0))  # Column-wise maximum


4
[5 2]


### np.argmax()
Returns the indices of the maximum values along an axis.

In [None]:
# Example 1: Index of max value in 1D array
arr = np.array([3, 1, 4, 2])
print(np.argmax(arr))

# Example 2: Index of max value along an axis
arr_2d = np.array([[3, 1], [5, 2]])
print(np.argmax(arr_2d, axis=0))  # Column-wise index of max values


2
[1 1]


### np.where()
Returns elements chosen from two arrays depending on a condition.

In [None]:
# Example 1: Find indices of elements greater than a threshold
arr = np.array([1, 2, 3, 4])
indices = np.where(arr > 2)
print(indices)

# Example 2: Replace values based on a condition
arr = np.array([1, 2, 3, 4])
new_arr = np.where(arr > 2, 100, arr)
print(new_arr)


(array([2, 3]),)
[  1   2 100 100]


## Pandas Methods

### .read_csv()
Reads a CSV file into a DataFrame.

In [None]:
# Example 1: Load a CSV file
df = pd.read_csv('data.csv')
print(df.head())

# Example 2: Load with specific columns
df = pd.read_csv('data.csv', usecols=['Column1', 'Column2'])
print(df)


FileNotFoundError: [Errno 2] No such file or directory: 'data.csv'

### .DataFrame()
The DataFrame is a 2-dimensional, tabular data structure in pandas that is similar to a spreadsheet or SQL table. It has labeled axes (rows and columns).

pd.DataFrame(data, index=None, columns=None, dtype=None)

data: Data to create the DataFrame (e.g., dictionary, list of lists, NumPy array, etc.).
index: Row labels.
columns: Column labels.

In [None]:
# Example 1: From dictionary
df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
print(df)

# Example 2: From list
df2 = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
print(df2)


   A  B
0  1  3
1  2  4
   A  B
0  1  2
1  3  4


### .Series()
The Series is a 1-dimensional labeled array capable of holding any data type. It can be thought of as a column in a DataFrame or a single array with row labels.

Key Features:
Data is stored in a single column.
It has an associated index to access elements.
Each element can be accessed using its position or index label.

In [None]:
# Example 1: From list
s = pd.Series([1, 2, 3])
print(s)

# Example 2: From dictionary
s2 = pd.Series({'a': 1, 'b': 2})
print(s2)


0    1
1    2
2    3
dtype: int64
a    1
b    2
dtype: int64


### .head()
Returns the first n rows of the DataFrame.

In [None]:
# Example 1: Default first 5 rows
df = pd.DataFrame({'A': range(10)})
print(df.head())

# Example 2: Custom number of rows
print(df.head(3))


   A
0  0
1  1
2  2
3  3
4  4
   A
0  0
1  1
2  2


### .tail()
Returns the last n rows of the DataFrame.

In [None]:
# Example 1: Default last 5 rows
print(df.tail())

# Example 2: Custom number of rows
print(df.tail(2))


   A
5  5
6  6
7  7
8  8
9  9
   A
8  8
9  9


### .info()
Provides a summary of the DataFrame.

In [None]:
# Example 1: Basic info
df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
print(df.info())

# Example 2: Info with null values
df = pd.DataFrame({'A': [1, None], 'B': [3, 4]})
print(df.info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   A       2 non-null      int64
 1   B       2 non-null      int64
dtypes: int64(2)
memory usage: 160.0 bytes
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   A       1 non-null      float64
 1   B       2 non-null      int64  
dtypes: float64(1), int64(1)
memory usage: 160.0 bytes
None


### .describe()
Generates descriptive statistics.

In [None]:
# Example 1: Numerical summary
print(df.describe())

# Example 2: Include all data types
print(df.describe(include='all'))


         A         B
count  1.0  2.000000
mean   1.0  3.500000
std    NaN  0.707107
min    1.0  3.000000
25%    1.0  3.250000
50%    1.0  3.500000
75%    1.0  3.750000
max    1.0  4.000000
         A         B
count  1.0  2.000000
mean   1.0  3.500000
std    NaN  0.707107
min    1.0  3.000000
25%    1.0  3.250000
50%    1.0  3.500000
75%    1.0  3.750000
max    1.0  4.000000


### .shape
Returns the dimensions of the DataFrame.

In [None]:
# Example 1: Shape of a simple DataFrame
df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
print(df.shape)

# Example 2: Shape after filtering rows
filtered_df = df[df['A'] > 1]
print(filtered_df.shape)


(2, 2)
(1, 2)


### .columns
Returns the column labels of the DataFrame.

In [None]:
# Example 1: Column names of a DataFrame
df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
print(df.columns)

# Example 2: Renaming columns
df.columns = ['X', 'Y']
print(df.columns)


Index(['A', 'B'], dtype='object')
Index(['X', 'Y'], dtype='object')


### .iloc[]
Accesses rows and columns by index positions.

In [None]:
# Example 1: Selecting specific rows and columns
df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
print(df.iloc[0, 1])  # Access element in 1st row, 2nd column

# Example 2: Selecting slices
print(df.iloc[:, 1])  # All rows, 2nd column


3
0    3
1    4
Name: B, dtype: int64


### .loc[]
Accesses rows and columns by labels.

In [None]:
# Example 1: Select by row and column labels
df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}, index=['row1', 'row2'])
print(df)
print(df.loc['row1', 'B'])

# Example 2: Select with slices
print(df.loc[:, 'B'])


      A  B
row1  1  3
row2  2  4
3
row1    3
row2    4
Name: B, dtype: int64


### .drop()
Removes specified rows or columns from the DataFrame.

In [None]:
# Example 1: Drop a column
df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df_dropped = df.drop('A', axis=1)
print(df_dropped)

# Example 2: Drop a row
df_dropped_row = df.drop(0, axis=0)
print(df_dropped_row)


   B
0  3
1  4
   A  B
1  2  4


### .sort_values()
Sorts the DataFrame by the values of a column.

In [None]:
# Example 1: Sort by a column
df = pd.DataFrame({'A': [3, 1, 2], 'B': [6, 4, 5]})
sorted_df = df.sort_values(by='A')
print(sorted_df)

# Example 2: Sort descending
sorted_desc = df.sort_values(by='A', ascending=False)
print(sorted_desc)


   A  B
1  1  4
2  2  5
0  3  6
   A  B
0  3  6
2  2  5
1  1  4


### .groupby()
Groups the DataFrame using a column or index.

In [None]:
# Example 1: Group by column and calculate mean
df = pd.DataFrame({'Category': ['A', 'A', 'B'], 'Values': [1, 2, 3]})
grouped = df.groupby('Category').mean()
print(grouped)

# Example 2: Group by multiple columns
df = pd.DataFrame({'Category': ['A', 'A', 'B'], 'Type': ['X', 'Y', 'X'], 'Values': [1, 2, 3]})
grouped = df.groupby(['Category', 'Type']).sum()
print(grouped)


          Values
Category        
A            1.5
B            3.0
               Values
Category Type        
A        X          1
         Y          2
B        X          3


### .merge()
Merges two DataFrames.

In [None]:
# Example 1: Merge on a common column
df1 = pd.DataFrame({'ID': [1, 2], 'Value1': [10, 20]})
df2 = pd.DataFrame({'ID': [1, 2], 'Value2': [30, 40]})
merged = pd.merge(df1, df2, on='ID')
print(merged)

# Example 2: Merge with different join types
merged_left = pd.merge(df1, df2, on='ID', how='left')
print(merged_left)


   ID  Value1  Value2
0   1      10      30
1   2      20      40
   ID  Value1  Value2
0   1      10      30
1   2      20      40


### .apply()
Applies a function to rows or columns.

In [None]:
# Example 1: Apply a lambda function
df = pd.DataFrame({'A': [1, 2, 3]})
df['Squared'] = df['A'].apply(lambda x: x**2)
print(df)

# Example 2: Apply a custom function
def add_five(x):
    return x + 5

df['AddFive'] = df['A'].apply(add_five)
print(df)


   A  Squared
0  1        1
1  2        4
2  3        9
   A  Squared  AddFive
0  1        1        6
1  2        4        7
2  3        9        8


### .isnull()
Checks for missing values in the DataFrame.

In [None]:
# Example 1: Check for nulls
df = pd.DataFrame({'A': [1, None, 3]})
print(df.isnull())

# Example 2: Count null values
null_count = df.isnull().sum()
print(null_count)


       A
0  False
1   True
2  False
A    1
dtype: int64


# Exercises

## NumPy Exercises

1. Create a 3x3 NumPy array filled with random integers between 1 and 10.

2. Reshape a 1D array of 12 elements into a 2D array of shape (3, 4).

3. Compute the mean, sum, and standard deviation of a given array.

4. Find the indices of all elements greater than 5 in a given array.

5. Perform matrix multiplication for two 2D arrays.

## Pandas Exercises

1. Read a CSV file into a Pandas DataFrame and display its first 5 rows.

2. Sort the DataFrame by a specific column in descending order.

3. Group the DataFrame by a column and calculate the mean of each group.

4. Drop rows with missing values in a given DataFrame.

5. Merge two DataFrames on a common column.