## About Numpy

In [2]:
import numpy as np

In [10]:
arr1 = np.array([1, 2, 3, 6, 8])
print(arr1)
print(type(arr1))

print(arr1.shape)

arr1 = arr1.reshape((1, 5))
print(arr1.shape)

[1 2 3 6 8]
<class 'numpy.ndarray'>
(5,)
(1, 5)


In [15]:
arr2 = np.arange(0, 8, 2).reshape(1, 4)
print(arr2)

[[0 2 4 6]]


In [17]:
np.ones((3, 5))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [18]:
np.eye(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [20]:
print(arr1.ndim)
print(arr2.ndim)

2
2


In [21]:
print(arr1.dtype)

int64


In [23]:
print(arr1.itemsize)
print(arr1)

8
[[1 2 3 6 8]]


In [26]:
# Numpy vectorized operations

arr1 = np.array([1, 4, 6, 8])
arr2 = np.array([3, 6, 8, 2])

print(arr1 + arr2)
print(arr1 - arr2)
print(arr1 * arr2)
print(arr1 / arr2)

[ 4 10 14 10]
[-2 -2 -2  6]
[ 3 24 48 16]
[0.33333333 0.66666667 0.75       4.        ]


In [33]:
# Some universal functions

arr = np.array([1, 2, 4, 8])

print(np.sqrt(arr))
print(np.exp(arr))
print(np.sin(arr))
print(np.log(arr))

[1.         1.41421356 2.         2.82842712]
[2.71828183e+00 7.38905610e+00 5.45981500e+01 2.98095799e+03]
[ 0.84147098  0.90929743 -0.7568025   0.98935825]
[0.         0.69314718 1.38629436 2.07944154]


In [38]:
arr = np.array([[1, 2, 3, 4],[5, 6, 7, 8],[9, 10, 11, 12]])

# print(arr)

print(arr[0:2, 1:3]) # Perform slicing

[[2 3]
 [6 7]]


In [40]:
print(arr)

arr[1:] = -20

print(arr)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
[[  1   2   3   4]
 [-20 -20 -20 -20]
 [-20 -20 -20 -20]]


In [44]:
data = np.array([1, 2, 3, 4, 5])

mean = np.mean(data)
# print(mean)

std_dev = np.std(data)
# print(std_dev)

normalized_data = (data - mean) / std_dev
print(normalized_data)

varience = np.var(data)
print(varience)


[-1.41421356 -0.70710678  0.          0.70710678  1.41421356]
2.0


In [52]:
# Logical operations

data = np.arange(1, 11)
print(data)

print(data[(data>5) & (data<=8)])

[ 1  2  3  4  5  6  7  8  9 10]
[6 7 8]


## About Pandas

In [2]:
import pandas as pd

In [3]:
data = [1, 2, 3, 4, 5]

# Creating series from 1-D array.
series = pd.Series(data)

print(data)
print(series)
print(type(series))

[1, 2, 3, 4, 5]
0    1
1    2
2    3
3    4
4    5
dtype: int64
<class 'pandas.core.series.Series'>


In [4]:
# Creating series from dictionary

data = {'a': 1, 'b': 2, 'c': 3}
series = pd.Series(data)

print(series)     


a    1
b    2
c    3
dtype: int64


In [5]:
data = [29, 47, 16]
index = ['aa', 'bb', 'cc']

series = pd.Series(data, index=index)

print(series)

aa    29
bb    47
cc    16
dtype: int64


In [1]:
# About dataframe

## Creating a dataframe through dictionary
import pandas as pd

data = {
      'Name' : ['Keshav', 'Mayank', "Raju"],
      'Age' : [39, 28, 63],
      'City' : ['Kanpur', 'Lucknow', 'Goa']
}

dataframe = pd.DataFrame(data)
dataframe


Unnamed: 0,Name,Age,City
0,Keshav,39,Kanpur
1,Mayank,28,Lucknow
2,Raju,63,Goa


In [2]:
## Creating a dataframe through list of dictionaries

data = [
      {'Name': 'Krish', 'Age': 32, "City": 'Banglore '},
      {'Name': 'Krish', 'Age': 32, "City": 'Banglore '},
      {'Name': 'Krish', 'Age': 32, "City": 'Banglore '},
      {'Name': 'Krish', 'Age': 32, "City": 'Banglore '}
]

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City
0,Krish,32,Banglore
1,Krish,32,Banglore
2,Krish,32,Banglore
3,Krish,32,Banglore


In [8]:
df = pd.read_csv('diabetes.csv')

# df.head(7)
df.tail(5)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.34,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1
767,1,93,70,31,0,30.4,0.315,23,0


In [30]:
# How to access data from dataframe.

df = pd.DataFrame(data)
print(df)

# df['Name'] # It is actually a series (It prints entire column)

df.loc[0]

df.iloc[1][2]

    Name  Age       City
0  Krish   32  Banglore 
1  Krish   32  Banglore 
2  Krish   32  Banglore 
3  Krish   32  Banglore 


  df.iloc[1][2]


Unnamed: 0,Name,Age,City
0,Krish,32,Banglore
1,Krish,32,Banglore
2,Krish,32,Banglore
3,Krish,32,Banglore


In [40]:
df

print(df.at[2, 'City'])

Banglore 


In [39]:
print(df.iat[2, 2])

Banglore 


In [50]:
df

# Performing data manipulation with dataframe.

# Adding a column
df['Salary'] = [5000, 2894, 29324, 32824]
print(df)

# Removing a column
df.drop('Salary', axis=1, inplace=True)
print(df)

    Name  Age       City  Salary
0  Krish   32  Banglore     5000
1  Krish   32  Banglore     2894
2  Krish   32  Banglore    29324
3  Krish   32  Banglore    32824
    Name  Age       City
0  Krish   32  Banglore 
1  Krish   32  Banglore 
2  Krish   32  Banglore 
3  Krish   32  Banglore 


In [114]:
# Update the value of whole column

print(df)

df['Age'] = df['Age'] + 1
print(df)

    Name  Age       City
0  Krish   94  Banglore 
1  Krish   94  Banglore 
2  Krish   94  Banglore 
3  Krish   94  Banglore 
    Name  Age       City
0  Krish   95  Banglore 
1  Krish   95  Banglore 
2  Krish   95  Banglore 
3  Krish   95  Banglore 


In [118]:
# How to drop the whole row.

# print(df)

# df.drop(1, axis=0, inplace=True)

# print(df)

print(df.describe())

        Age
count   3.0
mean   95.0
std     0.0
min    95.0
25%    95.0
50%    95.0
75%    95.0
max    95.0


## Data manipulation and analysis with pandas.

In [3]:
import pandas as pd

In [4]:
df = pd.read_csv('data.csv')

# df.describe()
df.dtypes

Date         object
Category     object
Value       float64
Product      object
Sales       float64
Region       object
dtype: object

In [7]:
# Handling missing values

df.isnull().any(axis=0)

Date        False
Category    False
Value        True
Product     False
Sales        True
Region      False
dtype: bool

In [128]:
df.isnull().sum()

Date        0
Category    0
Value       3
Product     0
Sales       4
Region      0
dtype: int64

In [137]:
# Filling missing values with 0.

print(df.isnull().any())

df_filled = df.fillna(0)

df_filled.isnull().any()

Date        False
Category    False
Value        True
Product     False
Sales        True
Region      False
dtype: bool


Date        False
Category    False
Value       False
Product     False
Sales       False
Region      False
dtype: bool

In [143]:
df

df.isnull().any()

df['Value_updated'] = df['Value'].fillna(df['Value'].mean())
df

Unnamed: 0,Date,Category,Value,Product,Sales,Region,Value_updated
0,2023-01-01,A,28.0,Product1,754.0,East,28.0
1,2023-01-02,B,39.0,Product3,110.0,North,39.0
2,2023-01-03,C,32.0,Product2,398.0,East,32.0
3,2023-01-04,B,8.0,Product1,522.0,East,8.0
4,2023-01-05,B,26.0,Product3,869.0,North,26.0
5,2023-01-06,B,54.0,Product3,192.0,West,54.0
6,2023-01-07,A,16.0,Product1,936.0,East,16.0
7,2023-01-08,C,89.0,Product1,488.0,West,89.0
8,2023-01-09,C,37.0,Product3,772.0,West,37.0
9,2023-01-10,A,22.0,Product2,834.0,West,22.0


In [19]:
# How to rename columns in dataframe.Age

df = df.rename(columns={'Sales': 'Sales_Data'})
df.head(10)

Unnamed: 0,Date,Category,Value,Product,Sales_Data,Region
0,2023-01-01,A,28.0,Product1,754.0,East
1,2023-01-02,B,39.0,Product3,110.0,North
2,2023-01-03,C,32.0,Product2,398.0,East
3,2023-01-04,B,8.0,Product1,522.0,East
4,2023-01-05,B,26.0,Product3,869.0,North
5,2023-01-06,B,54.0,Product3,192.0,West
6,2023-01-07,A,16.0,Product1,936.0,East
7,2023-01-08,C,89.0,Product1,488.0,West
8,2023-01-09,C,37.0,Product3,772.0,West
9,2023-01-10,A,22.0,Product2,834.0,West


In [22]:
# How to change the datatypes in df.

df['Value_New'] = df['Value'].fillna(df['Value'].mean()).astype(int)
df

Unnamed: 0,Date,Category,Value,Product,Sales_Data,Region,Value_New
0,2023-01-01,A,28.0,Product1,754.0,East,28
1,2023-01-02,B,39.0,Product3,110.0,North,39
2,2023-01-03,C,32.0,Product2,398.0,East,32
3,2023-01-04,B,8.0,Product1,522.0,East,8
4,2023-01-05,B,26.0,Product3,869.0,North,26
5,2023-01-06,B,54.0,Product3,192.0,West,54
6,2023-01-07,A,16.0,Product1,936.0,East,16
7,2023-01-08,C,89.0,Product1,488.0,West,89
8,2023-01-09,C,37.0,Product3,772.0,West,37
9,2023-01-10,A,22.0,Product2,834.0,West,22


In [24]:
# How to do modification in the data of the column.

df.head(6)

df['Updated_Value'] = df['Value'].apply(lambda x: x*2)
df

Unnamed: 0,Date,Category,Value,Product,Sales_Data,Region,Value_New,Updated_Value
0,2023-01-01,A,28.0,Product1,754.0,East,28,56.0
1,2023-01-02,B,39.0,Product3,110.0,North,39,78.0
2,2023-01-03,C,32.0,Product2,398.0,East,32,64.0
3,2023-01-04,B,8.0,Product1,522.0,East,8,16.0
4,2023-01-05,B,26.0,Product3,869.0,North,26,52.0
5,2023-01-06,B,54.0,Product3,192.0,West,54,108.0
6,2023-01-07,A,16.0,Product1,936.0,East,16,32.0
7,2023-01-08,C,89.0,Product1,488.0,West,89,178.0
8,2023-01-09,C,37.0,Product3,772.0,West,37,74.0
9,2023-01-10,A,22.0,Product2,834.0,West,22,44.0
