# The Series Data Structures

In [40]:
# importing pandas 
import pandas as pd
pd.Series?

In [41]:
# series characters
animals= ['tiger', 'lion', 'bear']
pd.Series(animals)

0    tiger
1     lion
2     bear
dtype: object

In [42]:
# series number
numbers=[1,2,3]
pd.Series(numbers)

0    1
1    2
2    3
dtype: int64

In [39]:
#checking for None value in character and pandas is treated it as object
animals=['tiger','lion',None]
pd.Series(animals)


0    tiger
1     lion
2     None
dtype: object

In [43]:
#checking for None value in number and pandas is treated it as not as number 'NAN'
numbers= [1,2,  None]
pd.Series(numbers)

0    1.0
1    2.0
2    NaN
dtype: float64

In [11]:
# NAN is not none because when we run a quality test it return False
import numpy  as np
np.nan== None

False

In [13]:
# therefore we consider nan as NONE in numerical value
np.isnan(np.nan)

True

In [18]:
sports={'india':'hockey',
        'australia':'cricket',
        'england':'cricket',
        'U.S.A':'baseball'}
s=pd.Series(sports)
s

U.S.A        baseball
australia     cricket
england       cricket
india          hockey
dtype: object

In [19]:
s.index

Index(['U.S.A', 'australia', 'england', 'india'], dtype='object')

In [20]:
# we can explicit our index data by passin as an list expliciting it
s=pd.Series(['tiger','lion','bear'],index=['india','america','china'])
s

india      tiger
america     lion
china       bear
dtype: object

# Querying a Series 

In [21]:
sports={'india':'hockey',
        'australia':'cricket',
        'england':'cricket',
        'U.S.A':'baseball'}
s=pd.Series(sports)
s

U.S.A        baseball
australia     cricket
england       cricket
india          hockey
dtype: object

In [22]:
s.iloc[3]

'hockey'

In [25]:
s.loc['U.S.A']

'baseball'

In [27]:
s= pd.Series([100.00,150.00,200.00])
s

0    100.0
1    150.0
2    200.0
dtype: float64

In [29]:
# printing total
total=0
for i in s:
    total += i
print(total)

450.0


In [30]:
# printing sum of a values in s using numpy
import numpy as np
total= np.sum(s)
print(total)

450.0


In [31]:
# print a random numbers using numpy
s=pd.Series(np.random.randint(0,100,1000))
s.head()

0    38
1    38
2     9
3    95
4    40
dtype: int32

In [33]:
# we can check the length of our series
len(s)

1000

# Data Frame And Data Structures


In [34]:
import pandas as pd
purchase_1=pd.Series({'name':'vivek',
                      'item':'shirt',
                      'cost':500})
purchase_2=pd.Series({'name':'murli',
                      'item':'dog_food',
                      'cost':200})
purchase_3=pd.Series({'name':'ashish',
                      'item':'shoes',
                      'cost':300})
df=pd.DataFrame([purchase_1,purchase_2,purchase_3],index=['store1','store2','store3'])
df.head()


Unnamed: 0,cost,item,name
store1,500,shirt,vivek
store2,200,dog_food,murli
store3,300,shoes,ashish


In [38]:
df.loc['store2']

cost         200
item    dog_food
name       murli
Name: store2, dtype: object

In [44]:
df['item']

store1       shirt
store2    dog_food
store3       shoes
Name: item, dtype: object

In [46]:
df.loc['store1','item']

'shirt'

In [47]:
df.T

Unnamed: 0,store1,store2,store3
cost,500,200,300
item,shirt,dog_food,shoes
name,vivek,murli,ashish


In [49]:
df.loc[:,['name', 'cost']]

Unnamed: 0,name,cost
store1,vivek,500
store2,murli,200
store3,ashish,300


In [50]:
#dropping the data
df.drop('store1')

Unnamed: 0,cost,item,name
store2,200,dog_food,murli
store3,300,shoes,ashish


In [51]:
# what about our original data frame
df

Unnamed: 0,cost,item,name
store1,500,shirt,vivek
store2,200,dog_food,murli
store3,300,shoes,ashish


# Data Frame Indexing And Loading


In [66]:
import pandas as pd
import numpy as np
df=pd.read_csv(r'C:\Users\vivek.s\Desktop\Iris.csv',index_col=0,skiprows=0)
df.head()


Unnamed: 0_level_0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,5.1,3.5,1.4,0.2,Iris-setosa
2,4.9,3.0,1.4,0.2,Iris-setosa
3,4.7,3.2,1.3,0.2,Iris-setosa
4,4.6,3.1,1.5,0.2,Iris-setosa
5,5.0,3.6,1.4,0.2,Iris-setosa


In [65]:
df.info()
df.describe()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 149 entries, 2 to 150
Data columns (total 5 columns):
5.1            149 non-null float64
3.5            149 non-null float64
1.4            149 non-null float64
0.2            149 non-null float64
Iris-setosa    149 non-null object
dtypes: float64(4), object(1)
memory usage: 7.0+ KB


Unnamed: 0,5.1,3.5,1.4,0.2
count,149.0,149.0,149.0,149.0
mean,5.848322,3.051007,3.774497,1.205369
std,0.828594,0.433499,1.759651,0.761292
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.4,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [58]:
df.info()
df.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
Id               150 non-null int64
SepalLengthCm    150 non-null float64
SepalWidthCm     150 non-null float64
PetalLengthCm    150 non-null float64
PetalWidthCm     150 non-null float64
Species          150 non-null object
dtypes: float64(4), int64(1), object(1)
memory usage: 7.1+ KB


Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0,150.0
mean,75.5,5.843333,3.054,3.758667,1.198667
std,43.445368,0.828066,0.433594,1.76442,0.763161
min,1.0,4.3,2.0,1.0,0.1
25%,38.25,5.1,2.8,1.6,0.3
50%,75.5,5.8,3.0,4.35,1.3
75%,112.75,6.4,3.3,5.1,1.8
max,150.0,7.9,4.4,6.9,2.5
