# Pandas Series
A Pandas series is a one-dimensional array that holds many data types and can be labelled with an index or label.  Numpy does not allow multiple data types or labels.

In [1]:
import pandas as pd

X = pd.Series(data = [30, 6, 'Yes','No'], index = ['eggs','apples','milk','bread'])
print(X)

eggs       30
apples      6
milk      Yes
bread      No
dtype: object


In [2]:
print(X.shape)
print(X.ndim)
print(X.size)

(4,)
1
4


In [3]:
print(X.values)
print(X.index)

[30 6 'Yes' 'No']
Index(['eggs', 'apples', 'milk', 'bread'], dtype='object')


In [4]:
print('bananas' in X)
print('eggs' in X)
print(30 in X)

False
True
False


In [5]:
print(X['eggs'])
print(X[['milk','bread']])

30
milk     Yes
bread     No
dtype: object


In [6]:
print(X.loc[['milk','bread']])

milk     Yes
bread     No
dtype: object


In [7]:
print(X[[0,1]])

eggs      30
apples     6
dtype: object


In [8]:
print(X[[-1]])

bread    No
dtype: object


In [9]:
print(X[0])

30


In [10]:
print(X.iloc[[2,3]])

milk     Yes
bread     No
dtype: object


In [11]:
print(X)
X['eggs'] = 4
print(X)

eggs       30
apples      6
milk      Yes
bread      No
dtype: object
eggs        4
apples      6
milk      Yes
bread      No
dtype: object


In [12]:
X.drop('apples') # Temporarily deletes apples.

eggs       4
milk     Yes
bread     No
dtype: object

In [13]:
X

eggs        4
apples      6
milk      Yes
bread      No
dtype: object

In [14]:
X.drop('apples', inplace=True) # Permenantly deletes apples.
X

eggs       4
milk     Yes
bread     No
dtype: object

In [15]:
fruits = pd.Series(data = [10, 6, 3,], index = ['apples', 'oranges', 'bananas'])
print(fruits)

apples     10
oranges     6
bananas     3
dtype: int64


In [16]:
print(fruits)
print()
print(fruits+2)
print()
print(fruits-4)
print()
print(fruits*3)
print()
print(fruits/2)
print()
print(fruits)

apples     10
oranges     6
bananas     3
dtype: int64

apples     12
oranges     8
bananas     5
dtype: int64

apples     6
oranges    2
bananas   -1
dtype: int64

apples     30
oranges    18
bananas     9
dtype: int64

apples     5.0
oranges    3.0
bananas    1.5
dtype: float64

apples     10
oranges     6
bananas     3
dtype: int64


In [17]:
import numpy as np

print('e to the x \n{}\n'.format(np.exp(fruits)))
print('Square Root \n{}\n'.format(np.sqrt(fruits)))
print('Raised to Power of 3 \n{}\n'.format(np.power(fruits,3)))
print('Original Array \n{}'.format(fruits))

e to the x 
apples     22026.465795
oranges      403.428793
bananas       20.085537
dtype: float64

Square Root 
apples     3.162278
oranges    2.449490
bananas    1.732051
dtype: float64

Raised to Power of 3 
apples     1000
oranges     216
bananas      27
dtype: int64

Original Array 
apples     10
oranges     6
bananas     3
dtype: int64


In [18]:
print(fruits,'\n')
print(fruits['bananas']+2,'\n') # 3 bananas + 2 
print(fruits.iloc[0]-2,'\n')    # 10 apples - 2
print(fruits[['apples','oranges']]*2,'\n')
print(fruits.loc[['apples','oranges']]/2,'\n')
print(fruits)

apples     10
oranges     6
bananas     3
dtype: int64 

5 

8 

apples     20
oranges    12
dtype: int64 

apples     5.0
oranges    3.0
dtype: float64 

apples     10
oranges     6
bananas     3
dtype: int64


In [19]:
print(X*2,'\n')
print(X)

eggs          8
milk     YesYes
bread      NoNo
dtype: object 

eggs       4
milk     Yes
bread     No
dtype: object


# Pandas DataFrames
Two-dimensional data structures with labeled rows and columns that hold many data types.

In [20]:
# We create a dictionary of Pandas Series 
items = {'Bob' : pd.Series(data = [245, 25, 55], index = ['bike', 'pants', 'watch']),
         'Alice' : pd.Series(data = [40, 110, 500, 45], index = ['book', 'glasses', 'bike', 'pants'])}

# We print the type of items to see that it is a dictionary
print(type(items))

<class 'dict'>


In [21]:
shopping_carts = pd.DataFrame(items)
shopping_carts

Unnamed: 0,Bob,Alice
bike,245.0,500.0
book,,40.0
glasses,,110.0
pants,25.0,45.0
watch,55.0,


In [22]:
print(shopping_carts)

           Bob  Alice
bike     245.0  500.0
book       NaN   40.0
glasses    NaN  110.0
pants     25.0   45.0
watch     55.0    NaN


In [23]:
# We create a dictionary of Pandas Series without indexes
data = {'Bob' : pd.Series([245, 25, 55]),
        'Alice' : pd.Series([40, 110, 500, 45])}

# We create a DataFrame
df = pd.DataFrame(data)

# We display the DataFrame
df

Unnamed: 0,Bob,Alice
0,245.0,40
1,25.0,110
2,55.0,500
3,,45


In [24]:
print(df)

     Bob  Alice
0  245.0     40
1   25.0    110
2   55.0    500
3    NaN     45


In [25]:
print('Shape: ', shopping_carts.shape)
print('Dimension: ', shopping_carts.ndim)
print('Size: ', shopping_carts.size)
print('\nValues: \n', shopping_carts.values)
print('\nRow Index: ', shopping_carts.index)
print('Column Index: ', shopping_carts.columns)

Shape:  (5, 2)
Dimension:  2
Size:  10

Values: 
 [[245. 500.]
 [ nan  40.]
 [ nan 110.]
 [ 25.  45.]
 [ 55.  nan]]

Row Index:  Index(['bike', 'book', 'glasses', 'pants', 'watch'], dtype='object')
Column Index:  Index(['Bob', 'Alice'], dtype='object')


In [26]:
bob_shopping_cart = pd.DataFrame(items, columns = ['Bob'])
bob_shopping_cart

Unnamed: 0,Bob
bike,245
pants,25
watch,55


In [27]:
selected_items_cart = pd.DataFrame(items, index =['pants','books'])
selected_items_cart

Unnamed: 0,Bob,Alice
pants,25.0,45.0
books,,


In [28]:
# We create a list of Python dictionaries
items2 = [{'bikes': 20, 'pants': 30, 'watches': 35}, 
          {'watches': 10, 'glasses': 50, 'bikes': 15, 'pants':5}]

# We create a DataFrame  and provide the row index
store_items = pd.DataFrame(items2, index = ['store 1', 'store 2'])

# We display the DataFrame
store_items

Unnamed: 0,bikes,glasses,pants,watches
store 1,20,,30,35
store 2,15,50.0,5,10


In [29]:
print(store_items[['bikes']], '\n')
print(store_items[['bikes','pants']], '\n')
print(store_items.loc[['store 1']], '\n')
print(store_items['bikes']['store 2'], '\n')

         bikes
store 1     20
store 2     15 

         bikes  pants
store 1     20     30
store 2     15      5 

         bikes  glasses  pants  watches
store 1     20      NaN     30       35 

15 



In [30]:
store_items['shirts']=[15,2]
store_items

Unnamed: 0,bikes,glasses,pants,watches,shirts
store 1,20,,30,35,15
store 2,15,50.0,5,10,2


In [31]:
store_items['suits'] = store_items['pants'] + store_items['shirts']
store_items

Unnamed: 0,bikes,glasses,pants,watches,shirts,suits
store 1,20,,30,35,15,45
store 2,15,50.0,5,10,2,7


In [32]:
new_items = [{'bikes':20, 'pants':30,'watches':35,'glasses':4}]
new_store = pd.DataFrame(new_items, index = ['store 3'])
new_store

Unnamed: 0,bikes,glasses,pants,watches
store 3,20,4,30,35


In [33]:
store_items = store_items.append(new_store)
store_items

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


Unnamed: 0,bikes,glasses,pants,shirts,suits,watches
store 1,20,,30,15.0,45.0,35
store 2,15,50.0,5,2.0,7.0,10
store 3,20,4.0,30,,,35


In [34]:
store_items['new watches'] = store_items['watches'][1:]
store_items

Unnamed: 0,bikes,glasses,pants,shirts,suits,watches,new watches
store 1,20,,30,15.0,45.0,35,
store 2,15,50.0,5,2.0,7.0,10,10.0
store 3,20,4.0,30,,,35,35.0


In [35]:
store_items.insert(4,'shoes',[8,5,0])
store_items

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches,new watches
store 1,20,,30,15.0,8,45.0,35,
store 2,15,50.0,5,2.0,5,7.0,10,10.0
store 3,20,4.0,30,,0,,35,35.0


In [36]:
store_items.pop('new watches')
store_items

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 1,20,,30,15.0,8,45.0,35
store 2,15,50.0,5,2.0,5,7.0,10
store 3,20,4.0,30,,0,,35


In [37]:
store_items = store_items.drop(['watches','shoes'], axis=1)
store_items

Unnamed: 0,bikes,glasses,pants,shirts,suits
store 1,20,,30,15.0,45.0
store 2,15,50.0,5,2.0,7.0
store 3,20,4.0,30,,


In [38]:
store_items = store_items.drop(['store 2'])
store_items

Unnamed: 0,bikes,glasses,pants,shirts,suits
store 1,20,,30,15.0,45.0
store 3,20,4.0,30,,


In [39]:
store_items = store_items.rename(columns={'bikes':'hats'})
store_items

Unnamed: 0,hats,glasses,pants,shirts,suits
store 1,20,,30,15.0,45.0
store 3,20,4.0,30,,


In [40]:
store_items = store_items.rename(index={'store 3':'last store'})
store_items

Unnamed: 0,hats,glasses,pants,shirts,suits
store 1,20,,30,15.0,45.0
last store,20,4.0,30,,


In [41]:
store_items = store_items.set_index('pants')
store_items

Unnamed: 0_level_0,hats,glasses,shirts,suits
pants,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
30,20,,15.0,45.0
30,20,4.0,,


In [42]:
# We create a list of Python dictionaries
items2 = [{'bikes': 20, 'pants': 30, 'watches': 35, 'shirts': 15, 'shoes':8, 'suits':45},
{'watches': 10, 'glasses': 50, 'bikes': 15, 'pants':5, 'shirts': 2, 'shoes':5, 'suits':7},
{'bikes': 20, 'pants': 30, 'watches': 35, 'glasses': 4, 'shoes':10}]

# We create a DataFrame  and provide the row index
store_items = pd.DataFrame(items2, index = ['store 1', 'store 2', 'store 3'])
store_items

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 1,20,,30,15.0,8,45.0,35
store 2,15,50.0,5,2.0,5,7.0,10
store 3,20,4.0,30,,10,,35


In [43]:
x = store_items.isnull().sum().sum()
print('NaN values: ',x)

NaN values:  3


In [44]:
store_items.isnull()

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 1,False,True,False,False,False,False,False
store 2,False,False,False,False,False,False,False
store 3,False,False,False,True,False,True,False


In [45]:
store_items.isnull().sum()

bikes      0
glasses    1
pants      0
shirts     1
shoes      0
suits      1
watches    0
dtype: int64

In [46]:
store_items.count()

bikes      3
glasses    2
pants      3
shirts     2
shoes      3
suits      2
watches    3
dtype: int64

In [47]:
store_items.dropna(axis = 0) # Drops any row with NaN values

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 2,15,50.0,5,2.0,5,7.0,10


In [48]:
store_items.dropna(axis = 1) # Drops any column with NaN values

Unnamed: 0,bikes,pants,shoes,watches
store 1,20,30,8,35
store 2,15,5,5,10
store 3,20,30,10,35


In [49]:
store_items.fillna(0)

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 1,20,0.0,30,15.0,8,45.0,35
store 2,15,50.0,5,2.0,5,7.0,10
store 3,20,4.0,30,0.0,10,0.0,35


In [50]:
store_items.fillna(method = 'ffill', axis = 0) # Fills with previous value in column

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 1,20,,30,15.0,8,45.0,35
store 2,15,50.0,5,2.0,5,7.0,10
store 3,20,4.0,30,2.0,10,7.0,35


In [51]:
store_items.fillna(method = 'ffill', axis = 1) # Fills with previous value in row

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 1,20.0,20.0,30.0,15.0,8.0,45.0,35.0
store 2,15.0,50.0,5.0,2.0,5.0,7.0,10.0
store 3,20.0,4.0,30.0,30.0,10.0,10.0,35.0


In [52]:
store_items.fillna(method = 'backfill', axis = 0) # Fills with next value in column

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 1,20,50.0,30,15.0,8,45.0,35
store 2,15,50.0,5,2.0,5,7.0,10
store 3,20,4.0,30,,10,,35


In [53]:
store_items.interpolate(method = 'linear', axis = 0) # Fills using linear interpolation in column

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 1,20,,30,15.0,8,45.0,35
store 2,15,50.0,5,2.0,5,7.0,10
store 3,20,4.0,30,2.0,10,7.0,35


In [54]:
Google_stock = pd.read_csv('CSV/GOOG.csv')
print('Type: ', type(Google_stock))
print('Shape: ', Google_stock.shape)

ParserError: Error tokenizing data. C error: Expected 1 fields in line 75, saw 2


In [None]:
Google_stock

In [None]:
Google_stock.head()

In [None]:
Google_stock.tail()

In [None]:
Google_stock.isnull().any()

In [None]:
Google_stock.describe()

In [None]:
Google_stock['Adj Close'].describe()

In [None]:
Google_stock.max()

In [None]:
Google_stock.min()

In [None]:
Google_stock['Close'].mean()

In [None]:
Google_stock['Volume'].max()

In [None]:
Google_stock.corr() # Correlation between columns

    data.groupby(['Column 1'])['Column 2'].sum()
    data.groupby(['Column 1'])['Column 2'].mean()
    data.groupby(['Column 1', 'Column 3'])['Column 2'].sum()