## IMPORT PANDAS

In [1]:
import pandas as pd
import numpy as np
pd.__version__

'2.2.3'

In [2]:
s = pd.Series([10,20,30])
s

0    10
1    20
2    30
dtype: int64

In [3]:
df = pd.DataFrame({
    "name":["Rupesh","Roshini"],
    "age":[19,20]
})
df

Unnamed: 0,name,age
0,Rupesh,19
1,Roshini,20


## WRITING THE DATA INTO FILES

In [4]:
df.to_csv('data.csv',index=False)

In [5]:
df.to_excel('new.xlsx',sheet_name = 'weather_data')

## READ THE FILE

In [6]:
df = pd.DataFrame({
    "City":["Kakinada","Anakapalli","Machilipatnam","Vizag","Pallevelugu"],
    "Temperature":[28,29,30,31,32],
    "Humidity":[60,62,58,55,57],
    "Condition":["Sunny","Sunny","Cloudy","Cloudy","Rainy"]
})
df

Unnamed: 0,City,Temperature,Humidity,Condition
0,Kakinada,28,60,Sunny
1,Anakapalli,29,62,Sunny
2,Machilipatnam,30,58,Cloudy
3,Vizag,31,55,Cloudy
4,Pallevelugu,32,57,Rainy


# EXPLORING DATA

In [7]:
df.head(3)

Unnamed: 0,City,Temperature,Humidity,Condition
0,Kakinada,28,60,Sunny
1,Anakapalli,29,62,Sunny
2,Machilipatnam,30,58,Cloudy


In [8]:
df.tail()

Unnamed: 0,City,Temperature,Humidity,Condition
0,Kakinada,28,60,Sunny
1,Anakapalli,29,62,Sunny
2,Machilipatnam,30,58,Cloudy
3,Vizag,31,55,Cloudy
4,Pallevelugu,32,57,Rainy


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   City         5 non-null      object
 1   Temperature  5 non-null      int64 
 2   Humidity     5 non-null      int64 
 3   Condition    5 non-null      object
dtypes: int64(2), object(2)
memory usage: 292.0+ bytes


In [10]:
df['City'][df['Condition']=='cloudy']

Series([], Name: City, dtype: object)

In [11]:
df.shape

(5, 4)

In [12]:
df[2:4]

Unnamed: 0,City,Temperature,Humidity,Condition
2,Machilipatnam,30,58,Cloudy
3,Vizag,31,55,Cloudy


In [13]:
df.columns

Index(['City', 'Temperature', 'Humidity', 'Condition'], dtype='object')

In [14]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [15]:
df.dtypes

City           object
Temperature     int64
Humidity        int64
Condition      object
dtype: object

In [16]:
df.City

0         Kakinada
1       Anakapalli
2    Machilipatnam
3            Vizag
4      Pallevelugu
Name: City, dtype: object

In [17]:
df.Temperature

0    28
1    29
2    30
3    31
4    32
Name: Temperature, dtype: int64

## MISSING VALUES

In [18]:
df.isnull()

Unnamed: 0,City,Temperature,Humidity,Condition
0,False,False,False,False
1,False,False,False,False
2,False,False,False,False
3,False,False,False,False
4,False,False,False,False


In [19]:
df = pd.DataFrame({
    "name":["Rupesh","Roshini","Lokesh"],
    "age":[19,20,None]
})
df

Unnamed: 0,name,age
0,Rupesh,19.0
1,Roshini,20.0
2,Lokesh,


In [20]:
df.isnull()

Unnamed: 0,name,age
0,False,False
1,False,False
2,False,True


In [21]:
df.isnull().sum()

name    0
age     1
dtype: int64

In [22]:
df[df.isnull().any(axis=1)]

Unnamed: 0,name,age
2,Lokesh,


In [23]:
df['age'] = df['age'].fillna(df['age'].mean())

In [24]:
df

Unnamed: 0,name,age
0,Rupesh,19.0
1,Roshini,20.0
2,Lokesh,19.5


In [25]:
df_clean = df.dropna()

In [26]:
df_clean

Unnamed: 0,name,age
0,Rupesh,19.0
1,Roshini,20.0
2,Lokesh,19.5


## DUPLICATES

In [27]:
df = pd.DataFrame({
    "name":["Rupesh","Roshini","Rupesh"],
    "age":[19,20,19]
})
df

Unnamed: 0,name,age
0,Rupesh,19
1,Roshini,20
2,Rupesh,19


In [28]:
df.duplicated()

0    False
1    False
2     True
dtype: bool

In [29]:
df[df.duplicated()]

Unnamed: 0,name,age
2,Rupesh,19


In [30]:
df = df.drop_duplicates()
df

Unnamed: 0,name,age
0,Rupesh,19
1,Roshini,20


## STATISTICS

In [31]:
df = pd.read_excel('weather_data_XL.xlsx')
df

Unnamed: 0,Date,City,Temperature_C,Humidity_%,Condition
0,2025-01-01,Hyderbad,28,60,Sunny
1,2025-01-02,Hyderbad,29,62,Sunny
2,2025-01-03,Hyderbad,30,58,Cloudy
3,2025-01-04,Hyderbad,31,55,Cloudy
4,2025-01-05,Banglore,32,57,Rainy
5,2025-01-06,Banglore,33,59,Rainy
6,2025-01-07,Banglore,34,61,Sunny
7,2025-01-08,Banglore,32,63,Cloudy
8,2025-01-09,Vijayawada,31,64,Rainy
9,2025-01-10,Vijayawada,29,62,Sunny


In [34]:
df['Temperature_C'].max()

34

In [35]:
df['Temperature_C'].min()

28

In [36]:
df['Temperature_C'].mean()

np.float64(30.9)

In [38]:
df['Temperature_C'].std()

1.911950719959998

In [39]:
df['Temperature_C'].var()

3.6555555555555554

In [40]:
df['Temperature_C'].describe()

count    10.000000
mean     30.900000
std       1.911951
min      28.000000
25%      29.250000
50%      31.000000
75%      32.000000
max      34.000000
Name: Temperature_C, dtype: float64

In [41]:
df[df.Temperature_C == df.Temperature_C.max()]

Unnamed: 0,Date,City,Temperature_C,Humidity_%,Condition
6,2025-01-07,Banglore,34,61,Sunny


In [42]:
df.City[df.Temperature_C == df.Temperature_C.max()]

6    Banglore
Name: City, dtype: object

In [43]:
df.count()

Date             10
City             10
Temperature_C    10
Humidity_%       10
Condition        10
dtype: int64

In [44]:
df["City"].nunique()

3

In [45]:
df["City"].value_counts()

City
Hyderbad      4
Banglore      4
Vijayawada    2
Name: count, dtype: int64

## STRING CLEANING

In [46]:
df

Unnamed: 0,Date,City,Temperature_C,Humidity_%,Condition
0,2025-01-01,Hyderbad,28,60,Sunny
1,2025-01-02,Hyderbad,29,62,Sunny
2,2025-01-03,Hyderbad,30,58,Cloudy
3,2025-01-04,Hyderbad,31,55,Cloudy
4,2025-01-05,Banglore,32,57,Rainy
5,2025-01-06,Banglore,33,59,Rainy
6,2025-01-07,Banglore,34,61,Sunny
7,2025-01-08,Banglore,32,63,Cloudy
8,2025-01-09,Vijayawada,31,64,Rainy
9,2025-01-10,Vijayawada,29,62,Sunny


In [50]:
df["City"]=df["City"].str.lower()
df

Unnamed: 0,Date,City,Temperature_C,Humidity_%,Condition
0,2025-01-01,hyderbad,28,60,Sunny
1,2025-01-02,hyderbad,29,62,Sunny
2,2025-01-03,hyderbad,30,58,Cloudy
3,2025-01-04,hyderbad,31,55,Cloudy
4,2025-01-05,banglore,32,57,Rainy
5,2025-01-06,banglore,33,59,Rainy
6,2025-01-07,banglore,34,61,Sunny
7,2025-01-08,banglore,32,63,Cloudy
8,2025-01-09,vijayawada,31,64,Rainy
9,2025-01-10,vijayawada,29,62,Sunny


In [51]:
df["City"]=df["City"].str.upper()
df

Unnamed: 0,Date,City,Temperature_C,Humidity_%,Condition
0,2025-01-01,HYDERBAD,28,60,Sunny
1,2025-01-02,HYDERBAD,29,62,Sunny
2,2025-01-03,HYDERBAD,30,58,Cloudy
3,2025-01-04,HYDERBAD,31,55,Cloudy
4,2025-01-05,BANGLORE,32,57,Rainy
5,2025-01-06,BANGLORE,33,59,Rainy
6,2025-01-07,BANGLORE,34,61,Sunny
7,2025-01-08,BANGLORE,32,63,Cloudy
8,2025-01-09,VIJAYAWADA,31,64,Rainy
9,2025-01-10,VIJAYAWADA,29,62,Sunny


In [54]:
df["City"]=df["City"].str.replace("HYDERABAD", "CHENNAI")
df

Unnamed: 0,Date,City,Temperature_C,Humidity_%,Condition
0,2025-01-01,HYDERBAD,28,60,Sunny
1,2025-01-02,HYDERBAD,29,62,Sunny
2,2025-01-03,HYDERBAD,30,58,Cloudy
3,2025-01-04,HYDERBAD,31,55,Cloudy
4,2025-01-05,BANGLORE,32,57,Rainy
5,2025-01-06,BANGLORE,33,59,Rainy
6,2025-01-07,BANGLORE,34,61,Sunny
7,2025-01-08,BANGLORE,32,63,Cloudy
8,2025-01-09,VIJAYAWADA,31,64,Rainy
9,2025-01-10,VIJAYAWADA,29,62,Sunny


## CONCATE DATA FRAMES

In [57]:
india_weather = pd.DataFrame({
    "City":["Mumbai","Bangalore","Hyderabad"],
    "Temperature":[32,45,35]
})
india_weather

Unnamed: 0,City,Temperature
0,Mumbai,32
1,Bangalore,45
2,Hyderabad,35


In [58]:
Us_weather = pd.DataFrame({
    "City":["Chicago","Orlando","New york"],
    "Temperature":[32,45,35]
})
Us_weather

Unnamed: 0,City,Temperature
0,Chicago,32
1,Orlando,45
2,New york,35


In [62]:
df = pd.concat([india_weather,Us_weather],ignore_index=True)
df

Unnamed: 0,City,Temperature
0,Mumbai,32
1,Bangalore,45
2,Hyderabad,35
3,Chicago,32
4,Orlando,45
5,New york,35


In [63]:
df = pd.concat([india_weather,Us_weather],axis=1)
df

Unnamed: 0,City,Temperature,City.1,Temperature.1
0,Mumbai,32,Chicago,32
1,Bangalore,45,Orlando,45
2,Hyderabad,35,New york,35


## MERGE DATAFRAMES

In [64]:
temp_df = pd.DataFrame({
    "City":["Hyderabad","Vijayawada","Vishakapatnam","chennai"],
    "Temperature":[32,45,35,30]
})
temp_df

Unnamed: 0,City,Temperature
0,Hyderabad,32
1,Vijayawada,45
2,Vishakapatnam,35
3,chennai,30


In [74]:
humidity_df = pd.DataFrame({
    "City":["Hyderabad","Vijayawada","Vishakapatnam","bangalore"],
    "Humidity":[62,65,75,60]
})
humidity_df

Unnamed: 0,City,Humidity
0,Hyderabad,62
1,Vijayawada,65
2,Vishakapatnam,75
3,bangalore,60


In [77]:
df = pd.merge(temp_df,humidity_df,on="City")
df

Unnamed: 0,City,Temperature,Humidity
0,Hyderabad,32,62
1,Vijayawada,45,65
2,Vishakapatnam,35,75


In [81]:
df = pd.merge(temp_df,humidity_df,on="City",how="outer")
df

Unnamed: 0,City,Temperature,Humidity
0,Hyderabad,32.0,62.0
1,Vijayawada,45.0,65.0
2,Vishakapatnam,35.0,75.0
3,bangalore,,60.0
4,chennai,30.0,


In [83]:
df.isnull()

Unnamed: 0,City,Temperature,Humidity
0,False,False,False
1,False,False,False
2,False,False,False
3,False,True,False
4,False,False,True


In [84]:
df.isnull().sum()

City           0
Temperature    1
Humidity       1
dtype: int64

In [87]:
df['Temperature']=df['Temperature'].fillna(df.Temperature.min())

In [88]:
df.isnull()

Unnamed: 0,City,Temperature,Humidity
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False
4,False,False,True


In [89]:
df

Unnamed: 0,City,Temperature,Humidity
0,Hyderabad,32.0,62.0
1,Vijayawada,45.0,65.0
2,Vishakapatnam,35.0,75.0
3,bangalore,30.0,60.0
4,chennai,30.0,
