# Data Analysis - Major League Baseball Data

## Series

In [87]:
import pandas as pd
import numpy as np

# Creating an empty series, will result in DeprecationWarning
#series = pd.Series()

# Passing dtype as a parameter to Series for an empty series to avoid DeprecationWarning
# Creating an empty series
series = pd.Series(dtype='float64')
# Newline to separate series print statements
print('{}\n'.format(series))


Series([], dtype: float64)



In [88]:
series = pd.Series(5)
print('{}\n'.format(series))

0    5
dtype: int64



In [89]:
series = pd.Series([1, 2, 3])
print('{}\n'.format(series))

0    1
1    2
2    3
dtype: int64



In [90]:
series = pd.Series([1, 2.2]) # upcasting
print('{}\n'.format(series))

0    1.0
1    2.2
dtype: float64



In [91]:
arr = np.array([1, 2])
series = pd.Series(arr, dtype=np.float32)
print('{}\n'.format(series))

0    1.0
1    2.0
dtype: float32



In [92]:
series = pd.Series([[1, 2], [3, 4]])
print('{}\n'.format(series))

series = pd.Series(["Vee","Pree","Fluffy"])
print('{}\n'.format(series))

0    [1, 2]
1    [3, 4]
dtype: object

0       Vee
1      Pree
2    Fluffy
dtype: object



In [93]:
# custom indexing
series = pd.Series(["Albert","Duke","Jerry"],index=[1,2,3])
print('{} \n'.format(series))

1    Albert
2      Duke
3     Jerry
dtype: object 



In [94]:
#index can be made of any static type
series = pd.Series([1, 2, 3], index=['a', 8, 0.3])
print('{}\n'.format(series))

a      1
8      2
0.3    3
dtype: int64



### Exercise

In [95]:
s1 = pd.Series([1,3,5.2])
print('{}\n'.format(s1))

s2 = s1 * pd.Series([0.1,0.2,0.3])
print('{}\n'.format(s2))

s3 = pd.Series([1,3,8,np.nan],index = ['a', 'b', 'c', 'd'])
print('{}\n'.format(s3))

s4 = pd.Series({'a':0 , 'b':1 , 'c':2})
print('{}\n'.format(s4))


0    1.0
1    3.0
2    5.2
dtype: float64

0    0.10
1    0.60
2    1.56
dtype: float64

a    1.0
b    3.0
c    8.0
d    NaN
dtype: float64

a    0
b    1
c    2
dtype: int64



## DataFrame

In [96]:
df = pd.DataFrame()

In [97]:
df = pd.DataFrame([1,2,3])
print('{}\n'.format(df))

   0
0  1
1  2
2  3



In [98]:
df = pd.DataFrame([[1,2,3]])
print('{}\n'.format(df))

   0  1  2
0  1  2  3



In [99]:
df = pd.DataFrame([[1,2],[5,6]],columns =['c1','c2'] , index =['r1','r2'])
print('{}\n'.format(df))

    c1  c2
r1   1   2
r2   5   6



In [100]:
df = pd.DataFrame({'c1':[1,2],'c2':[4,5]},index=['r1','r2'])
print('{}\n'.format(df))

    c1  c2
r1   1   4
r2   2   5



### Upcasting Dataframe

In [101]:
upcast = pd.DataFrame([[5, 6], [1.2, 3]])

In [102]:
print('{}\n'.format(upcast))
print('\n',upcast.dtypes)

     0  1
0  5.0  6
1  1.2  3


 0    float64
1      int64
dtype: object


In [103]:
df = pd.DataFrame([[5, 6], [1.2, 3]])
ser = pd.Series([0, 0], name='r3')

#df_app = df.append(ser)
#print('{}\n'.format(df_app))

### Concatinating DataFrame

In [104]:

df = pd.DataFrame([[5, 6], [1.2, 3]])
ser = pd.Series([0, 0], name='r3')

df_app = pd.concat([df,ser],ignore_index = True)
print('{}\n'.format(df_app))

     0    1
0  5.0  6.0
1  1.2  3.0
2  0.0  NaN
3  0.0  NaN



In [105]:
df2 = pd.DataFrame([[0,0],[9,9]])
df_app = pd.concat([df_app,df2])
print('{}\n'.format(df_app))



     0    1
0  5.0  6.0
1  1.2  3.0
2  0.0  NaN
3  0.0  NaN
0  0.0  0.0
1  9.0  9.0



### Dropping Data

In [106]:
df = pd.DataFrame({'c1': [1, 2], 'c2': [3, 4],
                   'c3': [5, 6]},
                  index=['r1', 'r2'])
print('{}\n'.format(df))


    c1  c2  c3
r1   1   3   5
r2   2   4   6



In [107]:
df_drop = df.drop(labels='r1')
print('{}\n'.format(df_drop))


    c1  c2  c3
r2   2   4   6



In [108]:

# Drop columns c1, c3
df_drop = df.drop(labels=['c1', 'c3'], axis=1)
print('{}\n'.format(df_drop))


    c2
r1   3
r2   4



In [109]:


df_drop = df.drop(columns='c2')
print('{}\n'.format(df_drop))


    c1  c3
r1   1   5
r2   2   6



In [110]:

df.drop(index='r2', columns='c2')
print('{}\n'.format(df_drop))

    c1  c3
r1   1   5
r2   2   6



In [111]:
df =pd.DataFrame({'c1':[0,1,2,3],'c2':[5,6,7,8]},index=['r1', 'r2', 'r3', 'r4'])
print('{}\n'.format(df))

    c1  c2
r1   0   5
r2   1   6
r3   2   7
r4   3   8



In [112]:
row_df =  pd.DataFrame([[9,9]],columns=['c1', 'c2'],index=['r5'])
print('{}\n'.format(row_df))

    c1  c2
r5   9   9



In [113]:
#df_app = df.append(row_df)
df_app = pd.concat([df,row_df])
df_drop = df_app.drop(labels='r2')

print('{}\n'.format(df_app))
print('{}\n'.format(df_drop))

    c1  c2
r1   0   5
r2   1   6
r3   2   7
r4   3   8
r5   9   9

    c1  c2
r1   0   5
r3   2   7
r4   3   8
r5   9   9

