In [45]:
import pandas as pd

cols = ['Date', 'Time', 'Depth', 'Magnitude Type', 'Type', 'Magnitude']
df = pd.read_csv(f'../data/earthquakes_1965_2016_database.csv.zip')[cols]

df

Unnamed: 0,Date,Time,Depth,Magnitude Type,Type,Magnitude
0,01/02/1965,13:44:18,131.60,MW,Earthquake,6.0
1,01/04/1965,11:29:49,80.00,MW,Earthquake,5.8
2,01/05/1965,18:05:58,20.00,MW,Earthquake,6.2
3,01/08/1965,18:49:43,15.00,MW,Earthquake,5.8
4,01/09/1965,13:32:50,15.00,MW,Earthquake,5.8
...,...,...,...,...,...,...
23407,12/28/2016,08:22:12,12.30,ML,Earthquake,5.6
23408,12/28/2016,09:13:47,8.80,ML,Earthquake,5.5
23409,12/28/2016,12:38:51,10.00,MWW,Earthquake,5.9
23410,12/29/2016,22:30:19,79.00,MWW,Earthquake,6.3


## 1: Combine multiple columns using string concatenation

In [8]:
df['Magnitude Type'] + ', ' + df['Type']

0         MW, Earthquake
1         MW, Earthquake
2         MW, Earthquake
3         MW, Earthquake
4         MW, Earthquake
              ...       
23407     ML, Earthquake
23408     ML, Earthquake
23409    MWW, Earthquake
23410    MWW, Earthquake
23411     MB, Earthquake
Length: 23412, dtype: object

In [10]:
# TypeError: can only concatenate str (not "float") to str
# df['Magnitude Type'] + ', ' + df['Magnitude']

In [12]:
df['Magnitude Type'] + ', ' + df['Magnitude'].astype(str)

0         MW, 6.0
1         MW, 5.8
2         MW, 6.2
3         MW, 5.8
4         MW, 5.8
           ...   
23407     ML, 5.6
23408     ML, 5.5
23409    MWW, 5.9
23410    MWW, 6.3
23411     MB, 5.5
Length: 23412, dtype: object

## 2: Combine date and time columns into DateTime column

In [49]:
df['Date'] + ' ' + df['Time']

0        01/02/1965 13:44:18
1        01/04/1965 11:29:49
2        01/05/1965 18:05:58
3        01/08/1965 18:49:43
4        01/09/1965 13:32:50
                ...         
23407    12/28/2016 08:22:12
23408    12/28/2016 09:13:47
23409    12/28/2016 12:38:51
23410    12/29/2016 22:30:19
23411    12/30/2016 20:08:28
Length: 23412, dtype: object

In [50]:
pd.to_datetime(df['Date'] + ' ' + df['Time'], errors='ignore')

0        01/02/1965 13:44:18
1        01/04/1965 11:29:49
2        01/05/1965 18:05:58
3        01/08/1965 18:49:43
4        01/09/1965 13:32:50
                ...         
23407    12/28/2016 08:22:12
23408    12/28/2016 09:13:47
23409    12/28/2016 12:38:51
23410    12/29/2016 22:30:19
23411    12/30/2016 20:08:28
Length: 23412, dtype: object

## 3: Combine multiple columns with agg and join

In [51]:
df[['Date', 'Time']].T.agg(','.join)

0        01/02/1965,13:44:18
1        01/04/1965,11:29:49
2        01/05/1965,18:05:58
3        01/08/1965,18:49:43
4        01/09/1965,13:32:50
                ...         
23407    12/28/2016,08:22:12
23408    12/28/2016,09:13:47
23409    12/28/2016,12:38:51
23410    12/29/2016,22:30:19
23411    12/30/2016,20:08:28
Length: 23412, dtype: object

## 4: Combine multiple columns with lambda and join

In [42]:
df[['Date', 'Time']].agg(lambda x: ','.join(x.values), axis=1).T

0        01/02/1965,13:44:18
1        01/04/1965,11:29:49
2        01/05/1965,18:05:58
3        01/08/1965,18:49:43
4        01/09/1965,13:32:50
                ...         
23407    12/28/2016,08:22:12
23408    12/28/2016,09:13:47
23409    12/28/2016,12:38:51
23410    12/29/2016,22:30:19
23411    12/30/2016,20:08:28
Length: 23412, dtype: object

In [56]:
df.columns = ['Date', 'Date', 'Depth', 'Magnitude Type', 'Type', 'Magnitude']
df

Unnamed: 0,Date,Date.1,Depth,Magnitude Type,Type,Magnitude
0,01/02/1965,13:44:18,131.60,MW,Earthquake,6.0
1,01/04/1965,11:29:49,80.00,MW,Earthquake,5.8
2,01/05/1965,18:05:58,20.00,MW,Earthquake,6.2
3,01/08/1965,18:49:43,15.00,MW,Earthquake,5.8
4,01/09/1965,13:32:50,15.00,MW,Earthquake,5.8
...,...,...,...,...,...,...
23407,12/28/2016,08:22:12,12.30,ML,Earthquake,5.6
23408,12/28/2016,09:13:47,8.80,ML,Earthquake,5.5
23409,12/28/2016,12:38:51,10.00,MWW,Earthquake,5.9
23410,12/29/2016,22:30:19,79.00,MWW,Earthquake,6.3


In [104]:
df.groupby(df.columns, axis=1).agg(lambda x: x.apply(lambda y: ','.join([str(l) for l in y if str(l) != "nan"]), axis=1))

Unnamed: 0,Date,Depth,Magnitude,Magnitude Type,Type
0,"01/02/1965,13:44:18",131.6,6.0,MW,Earthquake
1,"01/04/1965,11:29:49",80.0,5.8,MW,Earthquake
2,"01/05/1965,18:05:58",20.0,6.2,MW,Earthquake
3,"01/08/1965,18:49:43",15.0,5.8,MW,Earthquake
4,"01/09/1965,13:32:50",15.0,5.8,MW,Earthquake
...,...,...,...,...,...
23407,"12/28/2016,08:22:12",12.3,5.6,ML,Earthquake
23408,"12/28/2016,09:13:47",8.8,5.5,ML,Earthquake
23409,"12/28/2016,12:38:51",10.0,5.9,MWW,Earthquake
23410,"12/29/2016,22:30:19",79.0,6.3,MWW,Earthquake


In [108]:
for i in df.groupby(df.columns, axis=1):
    print(i)

('Date',              Date      Date
0      01/02/1965  13:44:18
1      01/04/1965  11:29:49
2      01/05/1965  18:05:58
3      01/08/1965  18:49:43
4      01/09/1965  13:32:50
...           ...       ...
23407  12/28/2016  08:22:12
23408  12/28/2016  09:13:47
23409  12/28/2016  12:38:51
23410  12/29/2016  22:30:19
23411  12/30/2016  20:08:28

[23412 rows x 2 columns])
('Depth',         Depth
0      131.60
1       80.00
2       20.00
3       15.00
4       15.00
...       ...
23407   12.30
23408    8.80
23409   10.00
23410   79.00
23411   11.94

[23412 rows x 1 columns])
('Magnitude',        Magnitude
0            6.0
1            5.8
2            6.2
3            5.8
4            5.8
...          ...
23407        5.6
23408        5.5
23409        5.9
23410        6.3
23411        5.5

[23412 rows x 1 columns])
('Magnitude Type',       Magnitude Type
0                 MW
1                 MW
2                 MW
3                 MW
4                 MW
...              ...
23407       

In [110]:
df.groupby(df.columns, axis=1).apply(lambda x: x.values)

Date              [[01/02/1965, 13:44:18], [01/04/1965, 11:29:49...
Depth             [[131.6], [80.0], [20.0], [15.0], [15.0], [35....
Magnitude         [[6.0], [5.8], [6.2], [5.8], [5.8], [6.7], [5....
Magnitude Type    [[MW], [MW], [MW], [MW], [MW], [MW], [MW], [MW...
Type              [[Earthquake], [Earthquake], [Earthquake], [Ea...
dtype: object