In [27]:
import pandas as pd
from scipy.sparse import csr_matrix
import numpy as np

### Pandas

In [29]:
df = pd.DataFrame([['M', 'London', 'Billy Bob', '01-02-1990', 60], 
                   ['F', 'London', 'Carol Chen', '13-06-1987', 45], 
                   ['M', 'London', 'Danny Dyer', '13-09-1989', 80],
                   ['M', 'Edinburgh', 'Ewan Eton', '13-09-1989', 70]], columns=['gender', 'city', 'name', 'dob', 'weight'])


In [13]:
df

Unnamed: 0,gender,name,dob,weight
0,M,Billy Bob,01-02-1990,60
1,F,Carol Chen,13-06-1987,45
2,M,Danny Dyer,13-09-1989,80


In [14]:
# Get the second row
df.iloc[1]

gender             F
name      Carol Chen
dob       13-06-1987
weight            45
Name: 1, dtype: object

In [16]:
# Select an item from this row
df.iloc[1]['name']

'Carol Chen'

In [17]:
# Select a column
df['name'] # or df.name

0     Billy Bob
1    Carol Chen
2    Danny Dyer
Name: name, dtype: object

In [19]:
# Convert to numpy array
df.values

array([['M', 'Billy Bob', '01-02-1990', 60],
       ['F', 'Carol Chen', '13-06-1987', 45],
       ['M', 'Danny Dyer', '13-09-1989', 80]], dtype=object)

In [24]:
# Groupby gender and get the mean weight
df.groupby('gender')['weight'].mean().reset_index()

Unnamed: 0,gender,weight
0,F,45.0
1,M,73.333333


In [28]:
# Groupby gender and get the mean weight, the first name and the min dob
df.groupby('gender').agg({'weight': np.mean,
                         'name': 'first',
                         'dob': 'min'}).reset_index()

Unnamed: 0,gender,weight,name,dob
0,F,45.0,Carol Chen,13-06-1987
1,M,73.333333,Billy Bob,01-02-1990


In [30]:
# Groupby gender AND city and get the mean weight
df.groupby(['gender', 'city'])['weight'].mean().reset_index()

Unnamed: 0,gender,city,weight
0,F,London,45
1,M,Edinburgh,70
2,M,London,70


In [35]:
# Loop over each row in a pandas dataframe
for index, row in df.iterrows():
    print('This row has the index: %d' % index)
    # Select a column from the row
    print(row['name'])
    
    # Note that I have used "reset_index" to change the pandas series back into a pandas dataframe
    display(row.reset_index())

This row has the index: 0
Billy Bob


Unnamed: 0,index,0
0,gender,M
1,city,London
2,name,Billy Bob
3,dob,01-02-1990
4,weight,60


This row has the index: 1
Carol Chen


Unnamed: 0,index,1
0,gender,F
1,city,London
2,name,Carol Chen
3,dob,13-06-1987
4,weight,45


This row has the index: 2
Danny Dyer


Unnamed: 0,index,2
0,gender,M
1,city,London
2,name,Danny Dyer
3,dob,13-09-1989
4,weight,80


This row has the index: 3
Ewan Eton


Unnamed: 0,index,3
0,gender,M
1,city,Edinburgh
2,name,Ewan Eton
3,dob,13-09-1989
4,weight,70


### Sparse

In [3]:
mat = csr_matrix([[1, 0, 0, 0], 
                  [0, 2, 0, 0],
                  [0, 3, 0, 0],
                  [0, 0, 0, 4]])


In [4]:
# Print the values
print(mat)

  (0, 0)	1
  (1, 1)	2
  (2, 1)	3
  (3, 3)	4


In [5]:
# Get the indices of all non zero elements
# Note how these the same numbers as are in the brackets above.
mat.nonzero()

(array([0, 1, 2, 3], dtype=int32), array([0, 1, 1, 3], dtype=int32))

In [6]:
# Get the value at these indices in the matrix
mat[2, 1]

3

In [7]:
# Update a value in the matrix
mat[0, 3] = 5

  self._set_intXint(row, col, x.flat[0])


In [8]:
print(mat)

  (0, 0)	1
  (0, 3)	5
  (1, 1)	2
  (2, 1)	3
  (3, 3)	4


In [9]:
# Look at the matrix as a numpy
mat.todense()

matrix([[1, 0, 0, 5],
        [0, 2, 0, 0],
        [0, 3, 0, 0],
        [0, 0, 0, 4]], dtype=int64)