In [2]:
import numpy as np
import pandas as pd
import itertools

In [100]:
df = pd.DataFrame({'col1': [1, 2, 3]})
df

Unnamed: 0,col1
0,1
1,2
2,3


### The column of a dataframe is a Pandas Series:

In [101]:
print(f'Type of column of dataframe: {type(df.iloc[0])}')

Type of column of dataframe: <class 'pandas.core.series.Series'>


In [102]:
print(f'Type of column of dataframe: {type(df["col1"])}')

Type of column of dataframe: <class 'pandas.core.series.Series'>


### Pandas Series can be converted to a Python list using to_list() or list(df['col1']):

In [103]:
df['col1']

0    1
1    2
2    3
Name: col1, dtype: int64

In [104]:
df['col1'].to_list()

[1, 2, 3]

In [105]:
print(f'Type after to_list() conversion: {type(df["col1"].to_list())}')

Type after to_list() conversion: <class 'list'>


In [106]:
list(df['col1'])

[1, 2, 3]

In [107]:
print(f'Type after list(df["col1"]) conversion: {type(list(df["col1"]))}')

Type after list(df["col1"]) conversion: <class 'list'>


### When a single-column dataframe is squeezed, you get a Pandas Series:

In [108]:
print(type(df.squeeze()))

<class 'pandas.core.series.Series'>


In [109]:
df.squeeze()

0    1
1    2
2    3
Name: col1, dtype: int64

### You cannot squeeze a multi-column dataframe into a series:

In [110]:
df2 = pd.DataFrame({'col1': [1, 2, 3], 'col2': [4, 5, 6]})
df2

Unnamed: 0,col1,col2
0,1,4
1,2,5
2,3,6


In [111]:
print(type(df2.squeeze()))

<class 'pandas.core.frame.DataFrame'>


### Dataframe.to_numpy() gives Numpy array (available since Pandas v0.24):

In [112]:
df2.to_numpy()

array([[1, 4],
       [2, 5],
       [3, 6]])

### ... but the dtype becomes the one which can fit all values.
### If there are objects, dtype becomes object for all values:

In [113]:
df2['col3'] = pd.date_range(2000, periods=3)
df2

Unnamed: 0,col1,col2,col3
0,1,4,1970-01-01 00:00:00.000002
1,2,5,1970-01-02 00:00:00.000002
2,3,6,1970-01-03 00:00:00.000002


In [114]:
df2.to_numpy()

array([[1, 4, Timestamp('1970-01-01 00:00:00.000002')],
       [2, 5, Timestamp('1970-01-02 00:00:00.000002')],
       [3, 6, Timestamp('1970-01-03 00:00:00.000002')]], dtype=object)

### Creating numpy arrays with keys and values using records:

In [7]:
records = np.rec.fromarrays((np.array(['a', 'b', 'c']),
                             np.arange(3),
                             np.arange(4,7)),
                            names=['keys', 'values', 'more values'],
                            formats=['S1', 'f4', 'i4'])  # S/a = string, U = unicode, 
                                                         # i = signed integer, u = unsigned integer,
                                                         # f = floating point,
                                                         # c = complex number,
                                                         # b = boolean,
                                                         # M = datetime, m = timedelta
                                                         # O = python object
                                                         # V = raw (void data type in C)
records

rec.array([(b'a', 0., 4), (b'b', 1., 5), (b'c', 2., 6)],
          dtype=[('keys', 'S1'), ('values', '<f4'), ('more values', '<i4')])

In [8]:
records['keys']

array([b'a', b'b', b'c'], dtype='|S1')

In [9]:
records['values']

array([0., 1., 2.], dtype=float32)

In [10]:
records['more values']

array([4, 5, 6], dtype=int32)

In [11]:
pd.DataFrame(records)

Unnamed: 0,keys,values,more values
0,b'a',0.0,4
1,b'b',1.0,5
2,b'c',2.0,6
