### Pandas Data Structures

#### Importing modules

In [9]:
import pandas as pd

#### Series: one dimensional arrays / vectors

In [10]:
votingResults = pd.Series([2300,2000,1000,750])

In [11]:
votingResults

0    2300
1    2000
2    1000
3     750
dtype: int64

In [12]:
# Set Candidates to be the index of the series

votingResults = pd.Series([2300,2000,1000,750], index=['Candidate A', 'Candidate B', 'Candidate C', 'Candidate D'])

In [13]:
votingResults

Candidate A    2300
Candidate B    2000
Candidate C    1000
Candidate D     750
dtype: int64

In [7]:
# How do we do the following?
# View each candidate
# Voting greater than x


#### Dictionaries

In [14]:
#voting_dict = {'Candidate A':2300, 'Candidate B':2000, 'Candidate C': 1000, 'Candidate D': 750}
# convert it into series

voting_dict = {'Candidate A':2300, 'Candidate B':2000, 'Candidate C': 1000, 'Candidate D': 750}

In [15]:
voting_dict 

{'Candidate A': 2300,
 'Candidate B': 2000,
 'Candidate C': 1000,
 'Candidate D': 750}

In [16]:
voting = pd.Series(voting_dict)

In [17]:
voting

Candidate A    2300
Candidate B    2000
Candidate C    1000
Candidate D     750
dtype: int64

### Create a dataframe

In [18]:
data = {
    'candidates': ['Candidate A', 'Candidate B', 'Candidate C', 'Candidate D'],
    'votes' : [2300,2000,1000,750],
    'party' : ['Labor', 'UKIP', 'Torries', 'Green']
}

In [19]:
data

{'candidates': ['Candidate A', 'Candidate B', 'Candidate C', 'Candidate D'],
 'party': ['Labor', 'UKIP', 'Torries', 'Green'],
 'votes': [2300, 2000, 1000, 750]}

In [20]:
# Create a data frame

df = pd.DataFrame(data)

In [21]:
df

Unnamed: 0,candidates,party,votes
0,Candidate A,Labor,2300
1,Candidate B,UKIP,2000
2,Candidate C,Torries,1000
3,Candidate D,Green,750


In [22]:
# Create a data frame

df = pd.DataFrame(data, columns = ['party', 'votes', 'candidates'])

In [23]:
df

Unnamed: 0,party,votes,candidates
0,Labor,2300,Candidate A
1,UKIP,2000,Candidate B
2,Torries,1000,Candidate C
3,Green,750,Candidate D


In [24]:
# Add a column
df['Age'] = pd.Series([45,65,78,74])

In [26]:
df

Unnamed: 0,party,votes,candidates,Age
0,Labor,2300,Candidate A,45
1,UKIP,2000,Candidate B,65
2,Torries,1000,Candidate C,78
3,Green,750,Candidate D,74


In [27]:
# del column
del df['Age']

In [28]:
df

Unnamed: 0,party,votes,candidates
0,Labor,2300,Candidate A
1,UKIP,2000,Candidate B
2,Torries,1000,Candidate C
3,Green,750,Candidate D


In [29]:
df.T

Unnamed: 0,0,1,2,3
party,Labor,UKIP,Torries,Green
votes,2300,2000,1000,750
candidates,Candidate A,Candidate B,Candidate C,Candidate D


In [30]:
df

Unnamed: 0,party,votes,candidates
0,Labor,2300,Candidate A
1,UKIP,2000,Candidate B
2,Torries,1000,Candidate C
3,Green,750,Candidate D


In [35]:
def goodBad(x):
    if x >= 1500:
        return 'Good'
    else:
        return 'Bad'

In [36]:
goodBad(1500)

'Good'

In [39]:
df['Quality'] = df['votes'].apply(goodBad)

In [40]:
df

Unnamed: 0,party,votes,candidates,Quality
0,Labor,2300,Candidate A,Good
1,UKIP,2000,Candidate B,Good
2,Torries,1000,Candidate C,Bad
3,Green,750,Candidate D,Bad


In [41]:
data

{'candidates': ['Candidate A', 'Candidate B', 'Candidate C', 'Candidate D'],
 'party': ['Labor', 'UKIP', 'Torries', 'Green'],
 'votes': [2300, 2000, 1000, 750]}

In [42]:
import numpy as np

In [43]:
df['Quality2'] = np.where(df['votes']>= 1500, 'Good', 'Bad')

In [45]:
df

Unnamed: 0,party,votes,candidates,Quality,Quality2
0,Labor,2300,Candidate A,Good,Good
1,UKIP,2000,Candidate B,Good,Good
2,Torries,1000,Candidate C,Bad,Bad
3,Green,750,Candidate D,Bad,Bad


In [47]:
df.duplicated()

0    False
1    False
2    False
3    False
dtype: bool

In [50]:
df2 = df[df.party != 'UKIP']

In [51]:
df

Unnamed: 0,party,votes,candidates,Quality,Quality2
0,Labor,2300,Candidate A,Good,Good
1,UKIP,2000,Candidate B,Good,Good
2,Torries,1000,Candidate C,Bad,Bad
3,Green,750,Candidate D,Bad,Bad


In [52]:
df2

Unnamed: 0,party,votes,candidates,Quality,Quality2
0,Labor,2300,Candidate A,Good,Good
2,Torries,1000,Candidate C,Bad,Bad
3,Green,750,Candidate D,Bad,Bad


In [53]:
df.head(2)

Unnamed: 0,party,votes,candidates,Quality,Quality2
0,Labor,2300,Candidate A,Good,Good
1,UKIP,2000,Candidate B,Good,Good


In [54]:
df.head()

Unnamed: 0,party,votes,candidates,Quality,Quality2
0,Labor,2300,Candidate A,Good,Good
1,UKIP,2000,Candidate B,Good,Good
2,Torries,1000,Candidate C,Bad,Bad
3,Green,750,Candidate D,Bad,Bad


In [55]:
df.describe()

Unnamed: 0,votes
count,4.0
mean,1512.5
std,753.18767
min,750.0
25%,937.5
50%,1500.0
75%,2075.0
max,2300.0


In [56]:
df[1]

KeyError: 1

In [58]:
df[:-1]

Unnamed: 0,party,votes,candidates,Quality,Quality2
0,Labor,2300,Candidate A,Good,Good
1,UKIP,2000,Candidate B,Good,Good
2,Torries,1000,Candidate C,Bad,Bad


In [61]:
df[['party', 'Quality']]

Unnamed: 0,party,Quality
0,Labor,Good
1,UKIP,Good
2,Torries,Bad
3,Green,Bad


In [65]:
max(df['votes'])

2300