# Sereis & DataFrame using Numpy & Pandas

In [1]:
import numpy as np

In [2]:
ls = np.arange(1,10)

In [3]:
ls

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [4]:
ls[7]

8

In [5]:
ls.dtype

dtype('int32')

In [11]:
for item in ls:
    print(f"ls[{item-1}] = ",item)

ls[0] =  1
ls[1] =  2
ls[2] =  3
ls[3] =  4
ls[4] =  5
ls[5] =  6
ls[6] =  7
ls[7] =  8
ls[8] =  9


In [12]:
len(ls)

9

# Pandas

In [13]:
import pandas as pd

# Series

In [61]:
s1  = pd.Series([1,3,4,7])

In [15]:
s1

0    1
1    3
2    4
3    7
dtype: int64

In [20]:
s2 = pd.Series([2,3,4], index=["Feb","March","April"])

In [21]:
s2

Feb      2
March    3
April    4
dtype: int64

In [22]:
arr = np.array([1,2,3,4,5])

In [23]:
arr

array([1, 2, 3, 4, 5])

In [27]:
# Array converts into a Series
s3 = pd.Series(arr, index = [5,6,7,8,9])

In [28]:
s3

5    1
6    2
7    3
8    4
9    5
dtype: int32

In [42]:
temperature_celsius = pd.Series([25,26,27,28,29,30,31], index = ['Sun','Mon','Tue','Wed','Thu','Fri','Sat'])

In [43]:
temperature_celsius

Sun    25
Mon    26
Tue    27
Wed    28
Thu    29
Fri    30
Sat    31
dtype: int64

In [44]:
temperature_celsius.head(3)

Sun    25
Mon    26
Tue    27
dtype: int64

In [45]:
temperature_celsius.tail(3)

Thu    29
Fri    30
Sat    31
dtype: int64

In [46]:
temperature_celsius.describe()

count     7.000000
mean     28.000000
std       2.160247
min      25.000000
25%      26.500000
50%      28.000000
75%      29.500000
max      31.000000
dtype: float64

In [47]:
temperature_celsius.isnull()

Sun    False
Mon    False
Tue    False
Wed    False
Thu    False
Fri    False
Sat    False
dtype: bool

In [48]:
temperatures_celsius = pd.Series([25, 26, 27, np.nan, 29, 30, 31], index=['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])

In [49]:
temperatures_celsius

Mon    25.0
Tue    26.0
Wed    27.0
Thu     NaN
Fri    29.0
Sat    30.0
Sun    31.0
dtype: float64

In [50]:
temperatures_celsius.isnull()

Mon    False
Tue    False
Wed    False
Thu     True
Fri    False
Sat    False
Sun    False
dtype: bool

In [51]:
temperatures_celsius.notnull()

Mon     True
Tue     True
Wed     True
Thu    False
Fri     True
Sat     True
Sun     True
dtype: bool

In [52]:
s = pd.Series([1,2,3,4,5])

In [53]:
s

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [54]:
s[0]

1

In [55]:
s[:2]

0    1
1    2
dtype: int64

In [56]:
s[3:]

3    4
4    5
dtype: int64

In [58]:
s[:-1]

0    1
1    2
2    3
3    4
dtype: int64

In [60]:
temperatures_celsius["Wed":"Fri"]

Wed    27.0
Thu     NaN
Fri    29.0
dtype: float64

# Array to DataFrame = 2D Array

In [4]:
import numpy as np
import pandas as pd

In [5]:
a1 = np.array([2,3,4])
a2 = np.array([5,6,7])

In [8]:
d1 = pd.DataFrame(a1,a2)   # a2 = index

In [9]:
d1

Unnamed: 0,0
5,2
6,3
7,4


In [13]:
data = {
    "calories": [420,380,230],
    "duration": [50,40,45]
}

In [14]:
data

{'calories': [420, 380, 230], 'duration': [50, 40, 45]}

In [15]:
df = pd.DataFrame(data)

In [16]:
df

Unnamed: 0,calories,duration
0,420,50
1,380,40
2,230,45


In [2]:
import numpy as np
import pandas as pd

In [3]:
data = {
    'Name': ['John', 'Emma', 'Ryan', 'Sophia', 'Liam'],
    'Age': [25, 30, 28, 35, 40],
    'Gender': ['M', 'F', 'M', 'F', 'M'],
    'Salary': [50000, 60000, 55000, 70000, 65000]
}

In [4]:
data

{'Name': ['John', 'Emma', 'Ryan', 'Sophia', 'Liam'],
 'Age': [25, 30, 28, 35, 40],
 'Gender': ['M', 'F', 'M', 'F', 'M'],
 'Salary': [50000, 60000, 55000, 70000, 65000]}

In [13]:
df = pd.DataFrame(data)

In [15]:
df

Unnamed: 0,Name,Age,Gender,Salary
0,John,25,M,50000
1,Emma,30,F,60000
2,Ryan,28,M,55000
3,Sophia,35,F,70000
4,Liam,40,M,65000


In [16]:
df.shape

(5, 4)

In [17]:
df.columns

Index(['Name', 'Age', 'Gender', 'Salary'], dtype='object')

In [18]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [19]:
df['Salary'].mean()   # find Mean Value 

60000.0

In [20]:
np.mean(df['Salary'])   # find Mean Value 

60000.0

In [22]:
df.describe()

Unnamed: 0,Age,Salary
count,5.0,5.0
mean,31.6,60000.0
std,5.94138,7905.69415
min,25.0,50000.0
25%,28.0,55000.0
50%,30.0,60000.0
75%,35.0,65000.0
max,40.0,70000.0


In [23]:
df['Age']

0    25
1    30
2    28
3    35
4    40
Name: Age, dtype: int64

In [24]:
age = df['Age']

In [25]:
age

0    25
1    30
2    28
3    35
4    40
Name: Age, dtype: int64

In [30]:
df1 = df[['Age', 'Salary']]

In [31]:
df1

Unnamed: 0,Age,Salary
0,25,50000
1,30,60000
2,28,55000
3,35,70000
4,40,65000


In [35]:
selected_columns_loc = df.loc[:, ['Name', 'Age']]      # loc[:,] = Shows all rows & Selected Columns(Name & Age) 

In [36]:
selected_columns_loc

Unnamed: 0,Name,Age
0,John,25
1,Emma,30
2,Ryan,28
3,Sophia,35
4,Liam,40


In [38]:
selected_columns = df.loc[:, ['Name', 'Salary']]      # loc[:,] = Shows all rows & Selected Columns(Name & Salary) 

In [39]:
selected_columns

Unnamed: 0,Name,Salary
0,John,50000
1,Emma,60000
2,Ryan,55000
3,Sophia,70000
4,Liam,65000


In [42]:
selected_columns = df.loc[2:4, ['Name', 'Age']]       # loc[2:4,] = Shows rows only from 2 to 4 & Selected Columns(Name & Age) 

In [43]:
selected_columns

Unnamed: 0,Name,Age
2,Ryan,28
3,Sophia,35
4,Liam,40


In [46]:
selected_columns = df.loc[2:4, ]        # loc[:,] = Shows only rows from 2 to 4 & Shows All Columns 

In [47]:
selected_columns

Unnamed: 0,Name,Age,Gender,Salary
2,Ryan,28,M,55000
3,Sophia,35,F,70000
4,Liam,40,M,65000


In [48]:
older_than_30 = df[df['Age'] > 30]

In [49]:
older_than_30

Unnamed: 0,Name,Age,Gender,Salary
3,Sophia,35,F,70000
4,Liam,40,M,65000


In [57]:
good_salary = df[df['Salary'] >= 60000]

In [58]:
good_salary

Unnamed: 0,Name,Age,Gender,Salary
1,Emma,30,F,60000
3,Sophia,35,F,70000
4,Liam,40,M,65000


In [61]:
good_salary = df.loc[:, ['Name']]       # Shows only "Name" Column 

In [62]:
good_salary

Unnamed: 0,Name
0,John
1,Emma
2,Ryan
3,Sophia
4,Liam


In [66]:
male = df[df['Gender'] == 'M']       # finding "Male" Gender 

In [65]:
male

Unnamed: 0,Name,Age,Gender,Salary
0,John,25,M,50000
2,Ryan,28,M,55000
4,Liam,40,M,65000


In [67]:
female = df[df['Gender'] == 'F']

In [68]:
female

Unnamed: 0,Name,Age,Gender,Salary
1,Emma,30,F,60000
3,Sophia,35,F,70000
