In [1]:
cd /Users/scottclay/RESEARCH/lgalaxies/Lgalaxies_Analysis/read_data_pandas_hdf5/

/Users/scottclay/RESEARCH/lgalaxies/Lgalaxies_Analysis/read_data_pandas_hdf5


In [2]:
import sys

datadir = '../../Hen15_Dustmodel/output/'

sys.path.insert(0,datadir)

# Template structure for L-Galaxies data
#import snap_template   # structure temple for data
#import read_lgal       # function to read in data


In [3]:
# numpy dtype for LGAL_GAL_STRUCT
import numpy as np
struct_dtype = np.dtype([
('Type',np.int32,1),
('HaloIndex',np.int32,1),
('SnapNum',np.int32,1),
('LookBackTimeToSnap',np.float32,1),
('CentralMvir',np.float32,1),
('CentralRvir',np.float32,1),
('DistanceToCentralGal',np.float32,3),
('Pos',np.float32,3),
('Vel',np.float32,3),
('Len',np.int32,1),
('Mvir',np.float32,1),
('Rvir',np.float32,1),
('Vvir',np.float32,1),
('Vmax',np.float32,1),
('GasSpin',np.float32,3),
('StellarSpin',np.float32,3),
('InfallVmax',np.float32,1),
('InfallVmaxPeak',np.float32,1),
('InfallSnap',np.int32,1),
('InfallHotGas',np.float32,1),
('HotRadius',np.float32,1),
('OriMergTime',np.float32,1),
('MergTime',np.float32,1),
('ColdGas',np.float32,1),
('StellarMass',np.float32,1),
('BulgeMass',np.float32,1),
('DiskMass',np.float32,1),
('HotGas',np.float32,1),
('EjectedMass',np.float32,1),
('BlackHoleMass',np.float32,1),
('ICM',np.float32,1),
('MetalsColdGas',np.float32,3),
('MetalsBulgeMass',np.float32,3),
('MetalsDiskMass',np.float32,3),
('MetalsHotGas',np.float32,3),
('MetalsEjectedMass',np.float32,3),
('MetalsICM',np.float32,3),
('PrimordialAccretionRate',np.float32,1),
('CoolingRadius',np.float32,1),
('CoolingRate',np.float32,1),
('CoolingRate_beforeAGN',np.float32,1),
('QuasarAccretionRate',np.float32,1),
('RadioAccretionRate',np.float32,1),
('Sfr',np.float32,1),
('SfrBulge',np.float32,1),
('XrayLum',np.float32,1),
('BulgeSize',np.float32,1),
('StellarDiskRadius',np.float32,1),
('GasDiskRadius',np.float32,1),
('CosInclination',np.float32,1),
('DisruptOn',np.int32,1),
('MergeOn',np.int32,1),
('MagDust',np.float32,2),
('Mag',np.float32,2),
('MagBulge',np.float32,2),
('MassWeightAge',np.float32,1),
('DiskMass_elements',np.float32,11),
('BulgeMass_elements',np.float32,11),
('ColdGas_elements',np.float32,11),
('HotGas_elements',np.float32,11),
('ICM_elements',np.float32,11),
('EjectedMass_elements',np.float32,11),
('DustRatesISM',np.float32,5),
('Dust_elements',np.float32,11),
('Attenuation_Dust',np.float32,1)
])

properties_used = {}
for ii in struct_dtype.names:
    properties_used[ii] = False


In [4]:
def read_snap(folder,file_prefix,firstfile,lastfile,props,template):
    nTrees = 0
    nHalos = 0
    nTreeHalos = np.array([],dtype=np.int32)
    filter_list = []
    for prop in props:
        if props[prop]:
            filter_list.append((prop,template[prop]))
    filter_dtype = np.dtype(filter_list)
    gals = np.array([],dtype=filter_dtype)
    for ifile in range(firstfile,lastfile+1):
        filename = folder+'/'+file_prefix+"_"+"%d"%(ifile)
        f = open(filename,"rb")
        this_nTrees =  np.fromfile(f,np.int32,1)
        nTrees += this_nTrees
        this_nHalos = np.fromfile(f,np.int32,1)
        nHalos += this_nHalos
        print ("File ", ifile," nGals = ",this_nHalos)
        addednTreeHalos = np.fromfile(f,np.int32,this_nTrees)
        nTreeHalos = np.append(nTreeHalos,addednTreeHalos)
        this_addedGalaxy = np.fromfile(f,template,this_nHalos) # all properties
        addedGalaxy = np.zeros(this_nHalos,dtype=filter_dtype) # selected props
        for prop in template.names:
            if props[prop]:
                addedGalaxy[prop] = this_addedGalaxy[prop]
        gals = np.append(gals,addedGalaxy)      
        f.close()
    return (nTrees,nHalos,nTreeHalos,gals)

In [5]:
def read_snap(folder,file_prefix,firstfile,lastfile,props,template):
    """ Reads L-Galaxy output files.
    Returns: (nTrees,nHalos,nTreeHalos,gals)
    Inputs: (folder,file_prefix,firstfile,lastfile,props,template)
    props - list of properties to return
    template - structure dtype definition for database """
    filter_list = []
    for prop in props:
        if props[prop]:
            filter_list.append((prop,template[prop]))
    filter_dtype = np.dtype(filter_list)
    # First loop to determine how many galaxies there are:
    nTrees = 0
    nHalos = 0
    for ifile in range(firstfile,lastfile+1):
        filename = folder+'/'+file_prefix+"_"+"%d"%(ifile)
        f = open(filename,"rb")
        this_nTrees =  np.fromfile(f,np.int32,1)[0]
        nTrees += this_nTrees
        this_nHalos = np.fromfile(f,np.int32,1)[0]
        nHalos += this_nHalos
        f.close()
    # Allocate arrays
    print("Total nGals = ",nHalos)
    nTreeHalos = np.empty(nTrees,dtype=np.int32)
    gals = np.empty(nHalos,dtype=filter_dtype)
    # Second loop to populate arrays
    nTrees = 0
    nHalos = 0
    for ifile in range(firstfile,lastfile+1):
        filename = folder+'/'+file_prefix+"_"+"%d"%(ifile)
        f = open(filename,"rb")
        this_nTrees =  np.fromfile(f,np.int32,1)[0]
        this_nHalos = np.fromfile(f,np.int32,1)[0]
        print("File ", ifile," nGals = ",this_nHalos)
        addednTreeHalos = np.fromfile(f,np.int32,this_nTrees)
        nTreeHalos[nTrees:nTrees+this_nTrees]=addednTreeHalos
        this_addedGalaxy = np.fromfile(f,template,this_nHalos) # all properties
        addedGalaxy = np.empty(this_nHalos,dtype=filter_dtype) # selected props
        for prop in template.names:
            if props[prop]:
#                try:
                addedGalaxy[prop] = this_addedGalaxy[prop]
#                except:
#                    embed()
        gals[nHalos:nHalos+this_nHalos] = addedGalaxy
        nTrees += this_nTrees
        nHalos += this_nHalos
        f.close()
    return (nTrees,nHalos,nTreeHalos,gals)

In [6]:
snaplist_file = '../MRPlancksnaplist.txt'

In [7]:
snapshot=15
file_prefix = "SA_z6.97"
output_file = "lgal_z7.pkl"

snapshot=58
file_prefix = "SA_z0.00"
output_file = "lgal_z0.pkl"


In [8]:
# Define which files you want to read in
firstfile = 5
lastfile = 7 #511

# Define what properties you want to read in
props = properties_used

props['Type'] = True
props['ColdGas'] = True
props['StellarMass'] = True
props['BulgeMass'] = True
props['DiskMass'] = True
props['HotGas'] = True
props['ICM'] = True
props['MetalsColdGas'] = True
props['MetalsBulgeMass'] = True
props['MetalsDiskMass'] = True
props['MetalsHotGas'] = True
props['MetalsEjectedMass'] = True
props['MetalsICM'] = True
props['Sfr'] = True
props['SfrBulge'] = True
props['DiskMass_elements'] = True
props['BulgeMass_elements'] = True
props['ColdGas_elements'] = True
props['HotGas_elements'] = True
#props['DustMassISM'] = True
props['DustRatesISM'] = True
props['Dust_elements'] = True
props['Attenuation_Dust'] = True
props['Mag'] = True
props['MagDust'] = True
props['GasDiskRadius'] = True

In [9]:
columns = []
columns_dt = []
for i in props.keys():
    if props[i]==True:
        columns.append(i)
        columns_dt.append(struct_dtype[i])
        #print(i,struct_dtype[i])
columns_dt
#columns

[dtype('int32'),
 dtype('float32'),
 dtype('float32'),
 dtype('float32'),
 dtype('float32'),
 dtype('float32'),
 dtype('float32'),
 dtype(('<f4', (3,))),
 dtype(('<f4', (3,))),
 dtype(('<f4', (3,))),
 dtype(('<f4', (3,))),
 dtype(('<f4', (3,))),
 dtype(('<f4', (3,))),
 dtype('float32'),
 dtype('float32'),
 dtype('float32'),
 dtype(('<f4', (2,))),
 dtype(('<f4', (2,))),
 dtype(('<f4', (11,))),
 dtype(('<f4', (11,))),
 dtype(('<f4', (11,))),
 dtype(('<f4', (11,))),
 dtype(('<f4', (5,))),
 dtype(('<f4', (11,))),
 dtype('float32')]

In [10]:
(nTrees,nHalos,nTreeHalos,gals) = \
    read_snap(datadir,file_prefix,firstfile,lastfile,\
                            props,struct_dtype)

Total nGals =  151971
File  5  nGals =  45631
File  6  nGals =  56497
File  7  nGals =  49843


In [11]:
np.unique(gals['Type'])

array([0, 1, 2], dtype=int32)

In [12]:
import pandas as pd
df = pd.DataFrame()

In [13]:
df.head()

In [14]:
df['Type'] = list(gals['Type'])

In [15]:
df.head()

Unnamed: 0,Type
0,0
1,1
2,2
3,0
4,1


In [16]:
for keyword in columns:
    df[keyword]=list(gals[keyword])

In [17]:
df['HotGas_elements'][0]

array([  1.05860371e+09,   3.53765856e+08,   6.77825547e+04,
         1.70495586e+04,   2.47896938e+05,   2.80671543e+04,
         2.03164902e+04,   2.05733340e+04,   1.06337070e+04,
         1.39190381e+03,   2.43493320e+04], dtype=float32)

In [18]:
df.head()

Unnamed: 0,Type,ColdGas,StellarMass,BulgeMass,DiskMass,HotGas,ICM,MetalsColdGas,MetalsBulgeMass,MetalsDiskMass,...,GasDiskRadius,MagDust,Mag,DiskMass_elements,BulgeMass_elements,ColdGas_elements,HotGas_elements,DustRatesISM,Dust_elements,Attenuation_Dust
0,0,0.160346,0.012535,0.0,0.012535,0.095083,0.0,"[1.96429e-05, 0.000284727, 7.23087e-05]","[0.0, 0.0, 0.0]","[2.23552e-06, 2.93506e-05, -2.65425e-06]",...,0.012735,"[-15.8381, -15.7432]","[-15.8506, -15.752]","[1.38743e+08, 4.70601e+07, 58867.9, 14650.9, 2...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[1.77368e+09, 6.03056e+08, 334294.0, 230783.0,...","[1.0586e+09, 3.53766e+08, 67782.6, 17049.6, 24...","[0.000660955, 0.00038733, 1.12233e-05, 0.02891...","[0.0, 0.0, 628166.0, 0.0, 1.93924e+06, 0.0, 30...",0.008495
1,1,0.084843,0.177387,0.015681,0.161706,0.042501,0.0,"[4.73383e-05, 0.000772251, 0.000175075]","[6.23553e-06, 9.1531e-05, -2.18173e-05]","[0.000103298, 0.00128292, -0.000170072]",...,0.004789,"[-15.744, -15.9118]","[-16.2803, -16.3172]","[1.76023e+09, 6.22653e+08, 3.81778e+06, 645672...","[1.72302e+08, 5.94715e+07, 159154.0, 39175.6, ...","[9.09552e+08, 3.34423e+08, 778810.0, 521254.0,...","[4.63851e+08, 1.63112e+08, 947753.0, 146212.0,...","[0.00231219, 0.00342904, 2.54335e-05, 0.260108...","[0.0, 0.0, 1.32906e+06, 0.0, 3.3527e+06, 0.0, ...",0.087338
2,2,0.02433,0.032784,0.0,0.032784,0.0,0.0,"[9.67711e-06, 0.000151559, 3.9816e-05]","[0.0, 0.0, 0.0]","[1.42818e-05, 0.000176919, -1.85755e-05]",...,0.004394,"[-7.36062, -11.5814]","[-7.43458, -11.6589]","[3.59415e+08, 1.24939e+08, 617494.0, 91298.3, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[2.6394e+08, 9.42257e+07, 39550.8, 95272.5, 24...","[0.0, 0.0, 41095.0, 0.0, 89448.2, 0.0, 368.62,...","[6.03672e-05, 0.0, 5.17993e-08, 0.00363889, 0.0]","[0.0, 0.0, 416840.0, 0.0, 883194.0, 0.0, 4986....",0.036991
3,0,0.10585,1.332626,0.101917,1.230709,12.185545,0.000915,"[9.66682e-05, 0.00149067, 0.000707254]","[5.83635e-05, 0.000749799, -0.000119849]","[0.00118162, 0.0152198, -0.00278058]",...,0.015153,"[-7.92532, -14.7966]","[-8.05891, -14.9874]","[1.3239e+10, 4.81683e+09, 6.09379e+07, 6.85901...","[1.11258e+09, 3.90625e+08, 2.17448e+06, 361536...","[1.0883e+09, 4.45592e+08, 117216.0, 1.38344e+0...","[1.31801e+11, 4.74129e+10, 2.96153e+08, 6.0895...","[0.00407006, 0.0, 1.19025e-06, 0.0109498, 0.0]","[0.0, 0.0, 6.43265e+06, 0.0, 1.22162e+07, 0.0,...",0.049415
4,1,0.037885,0.011302,0.0,0.011302,0.002483,0.0,"[1.16095e-05, 0.000163645, 5.54287e-05]","[0.0, 0.0, 0.0]","[2.64387e-06, 2.96275e-05, -3.34442e-06]",...,0.005215,"[-12.3271, -12.5334]","[-12.4076, -12.5867]","[1.24951e+08, 4.2529e+07, 82991.5, 17914.2, 29...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[4.13762e+08, 1.45555e+08, 49222.2, 140586.0, ...","[2.75308e+07, 9.30426e+06, 14470.4, 2507.68, 4...","[4.69113e-05, 6.00094e-05, 6.96284e-07, 0.0056...","[0.0, 0.0, 696392.0, 0.0, 1.60724e+06, 0.0, 17...",0.045696


In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 151971 entries, 0 to 151970
Data columns (total 25 columns):
Type                  151971 non-null int64
ColdGas               151971 non-null float64
StellarMass           151971 non-null float64
BulgeMass             151971 non-null float64
DiskMass              151971 non-null float64
HotGas                151971 non-null float64
ICM                   151971 non-null float64
MetalsColdGas         151971 non-null object
MetalsBulgeMass       151971 non-null object
MetalsDiskMass        151971 non-null object
MetalsHotGas          151971 non-null object
MetalsEjectedMass     151971 non-null object
MetalsICM             151971 non-null object
Sfr                   151971 non-null float64
SfrBulge              151971 non-null float64
GasDiskRadius         151971 non-null float64
MagDust               151971 non-null object
Mag                   151971 non-null object
DiskMass_elements     151971 non-null object
BulgeMass_elements    1519

In [20]:
df2 = df[df['Type']==2]

In [21]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 36729 entries, 2 to 151882
Data columns (total 25 columns):
Type                  36729 non-null int64
ColdGas               36729 non-null float64
StellarMass           36729 non-null float64
BulgeMass             36729 non-null float64
DiskMass              36729 non-null float64
HotGas                36729 non-null float64
ICM                   36729 non-null float64
MetalsColdGas         36729 non-null object
MetalsBulgeMass       36729 non-null object
MetalsDiskMass        36729 non-null object
MetalsHotGas          36729 non-null object
MetalsEjectedMass     36729 non-null object
MetalsICM             36729 non-null object
Sfr                   36729 non-null float64
SfrBulge              36729 non-null float64
GasDiskRadius         36729 non-null float64
MagDust               36729 non-null object
Mag                   36729 non-null object
DiskMass_elements     36729 non-null object
BulgeMass_elements    36729 non-null object
Co

In [22]:
df['MetalsColdGas']

0         [1.96429e-05, 0.000284727, 7.23087e-05]
1         [4.73383e-05, 0.000772251, 0.000175075]
2          [9.67711e-06, 0.000151559, 3.9816e-05]
3          [9.66682e-05, 0.00149067, 0.000707254]
4         [1.16095e-05, 0.000163645, 5.54287e-05]
5         [6.81925e-06, 8.95839e-05, 4.09451e-05]
6         [1.40318e-05, 0.000202624, 8.00861e-05]
7           [0.000150553, 0.00226373, 0.00105649]
8         [2.45207e-05, 0.000354895, 9.97292e-05]
9                                 [0.0, 0.0, 0.0]
10        [5.90486e-05, 0.000967743, 0.000197644]
11          [0.000515861, 0.00825807, 0.00174992]
12        [1.01191e-05, 0.000142339, 4.78842e-05]
13         [0.000244195, 0.00413358, 0.000720223]
14        [1.35719e-05, 0.000187687, 4.79636e-05]
15         [8.13591e-05, 0.00136613, 0.000250423]
16         [4.57375e-05, 0.000761247, 0.00013786]
17        [2.96803e-06, 3.19927e-05, 3.06561e-05]
18          [0.000142108, 0.00234859, 0.00043779]
19        [9.66548e-06, 0.000150584, 4.53072e-05]


In [23]:
#df['A'], df['B'] = df['MetalsColdGas'].split(' ', 1)

In [24]:
tags = df['MetalsColdGas'].apply(pd.Series)

In [25]:
tags = tags.rename(columns = lambda x : 'listcol_' + str(x))

In [26]:
tags.head()

Unnamed: 0,listcol_0,listcol_1,listcol_2
0,2e-05,0.000285,7.2e-05
1,4.7e-05,0.000772,0.000175
2,1e-05,0.000152,4e-05
3,9.7e-05,0.001491,0.000707
4,1.2e-05,0.000164,5.5e-05


In [27]:
df = pd.concat([df[:], tags[:]], axis=1)

In [28]:
df['MetalsColdGas']

0         [1.96429e-05, 0.000284727, 7.23087e-05]
1         [4.73383e-05, 0.000772251, 0.000175075]
2          [9.67711e-06, 0.000151559, 3.9816e-05]
3          [9.66682e-05, 0.00149067, 0.000707254]
4         [1.16095e-05, 0.000163645, 5.54287e-05]
5         [6.81925e-06, 8.95839e-05, 4.09451e-05]
6         [1.40318e-05, 0.000202624, 8.00861e-05]
7           [0.000150553, 0.00226373, 0.00105649]
8         [2.45207e-05, 0.000354895, 9.97292e-05]
9                                 [0.0, 0.0, 0.0]
10        [5.90486e-05, 0.000967743, 0.000197644]
11          [0.000515861, 0.00825807, 0.00174992]
12        [1.01191e-05, 0.000142339, 4.78842e-05]
13         [0.000244195, 0.00413358, 0.000720223]
14        [1.35719e-05, 0.000187687, 4.79636e-05]
15         [8.13591e-05, 0.00136613, 0.000250423]
16         [4.57375e-05, 0.000761247, 0.00013786]
17        [2.96803e-06, 3.19927e-05, 3.06561e-05]
18          [0.000142108, 0.00234859, 0.00043779]
19        [9.66548e-06, 0.000150584, 4.53072e-05]


In [29]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 151971 entries, 0 to 151970
Data columns (total 28 columns):
Type                  151971 non-null int64
ColdGas               151971 non-null float64
StellarMass           151971 non-null float64
BulgeMass             151971 non-null float64
DiskMass              151971 non-null float64
HotGas                151971 non-null float64
ICM                   151971 non-null float64
MetalsColdGas         151971 non-null object
MetalsBulgeMass       151971 non-null object
MetalsDiskMass        151971 non-null object
MetalsHotGas          151971 non-null object
MetalsEjectedMass     151971 non-null object
MetalsICM             151971 non-null object
Sfr                   151971 non-null float64
SfrBulge              151971 non-null float64
GasDiskRadius         151971 non-null float64
MagDust               151971 non-null object
Mag                   151971 non-null object
DiskMass_elements     151971 non-null object
BulgeMass_elements    1519

In [30]:
df[df['StellarMass']*1.0E10/0.673>1E9].info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 41916 entries, 1 to 148312
Data columns (total 28 columns):
Type                  41916 non-null int64
ColdGas               41916 non-null float64
StellarMass           41916 non-null float64
BulgeMass             41916 non-null float64
DiskMass              41916 non-null float64
HotGas                41916 non-null float64
ICM                   41916 non-null float64
MetalsColdGas         41916 non-null object
MetalsBulgeMass       41916 non-null object
MetalsDiskMass        41916 non-null object
MetalsHotGas          41916 non-null object
MetalsEjectedMass     41916 non-null object
MetalsICM             41916 non-null object
Sfr                   41916 non-null float64
SfrBulge              41916 non-null float64
GasDiskRadius         41916 non-null float64
MagDust               41916 non-null object
Mag                   41916 non-null object
DiskMass_elements     41916 non-null object
BulgeMass_elements    41916 non-null object
Co

In [31]:
gals['Mag']

array([[-15.85059166, -15.7519989 ],
       [-16.28028679, -16.31723976],
       [ -7.4345808 , -11.65894413],
       ..., 
       [-14.26182842, -14.27921486],
       [ 99.        ,  99.        ],
       [-15.83415127, -15.74551201]], dtype=float32)

In [32]:
#gals2 = pd.DataFrame()

In [33]:
gals2 = pd.DataFrame()
for column in columns:
    gals2[column]=list(gals[column])

In [34]:
#gals2[gals2.info()==object]

In [35]:
gals3 = gals2.loc[:, gals2.dtypes == object]

In [36]:
gals3.head()

Unnamed: 0,MetalsColdGas,MetalsBulgeMass,MetalsDiskMass,MetalsHotGas,MetalsEjectedMass,MetalsICM,MagDust,Mag,DiskMass_elements,BulgeMass_elements,ColdGas_elements,HotGas_elements,DustRatesISM,Dust_elements
0,"[1.96429e-05, 0.000284727, 7.23087e-05]","[0.0, 0.0, 0.0]","[2.23552e-06, 2.93506e-05, -2.65425e-06]","[1.5381e-06, 2.3142e-05, 4.92734e-06]","[8.90892e-06, 0.000141201, 1.97739e-05]","[0.0, 0.0, 0.0]","[-15.8381, -15.7432]","[-15.8506, -15.752]","[1.38743e+08, 4.70601e+07, 58867.9, 14650.9, 2...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[1.77368e+09, 6.03056e+08, 334294.0, 230783.0,...","[1.0586e+09, 3.53766e+08, 67782.6, 17049.6, 24...","[0.000660955, 0.00038733, 1.12233e-05, 0.02891...","[0.0, 0.0, 628166.0, 0.0, 1.93924e+06, 0.0, 30..."
1,"[4.73383e-05, 0.000772251, 0.000175075]","[6.23553e-06, 9.1531e-05, -2.18173e-05]","[0.000103298, 0.00128292, -0.000170072]","[1.33e-05, 0.000213187, 4.85793e-05]","[0.000241423, 0.00384298, 0.000875618]","[0.0, 0.0, 0.0]","[-15.744, -15.9118]","[-16.2803, -16.3172]","[1.76023e+09, 6.22653e+08, 3.81778e+06, 645672...","[1.72302e+08, 5.94715e+07, 159154.0, 39175.6, ...","[9.09552e+08, 3.34423e+08, 778810.0, 521254.0,...","[4.63851e+08, 1.63112e+08, 947753.0, 146212.0,...","[0.00231219, 0.00342904, 2.54335e-05, 0.260108...","[0.0, 0.0, 1.32906e+06, 0.0, 3.3527e+06, 0.0, ..."
2,"[9.67711e-06, 0.000151559, 3.9816e-05]","[0.0, 0.0, 0.0]","[1.42818e-05, 0.000176919, -1.85755e-05]","[0.0, 0.0, 0.0]","[0.0, 0.0, 0.0]","[0.0, 0.0, 0.0]","[-7.36062, -11.5814]","[-7.43458, -11.6589]","[3.59415e+08, 1.24939e+08, 617494.0, 91298.3, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[2.6394e+08, 9.42257e+07, 39550.8, 95272.5, 24...","[0.0, 0.0, 41095.0, 0.0, 89448.2, 0.0, 368.62,...","[6.03672e-05, 0.0, 5.17993e-08, 0.00363889, 0.0]","[0.0, 0.0, 416840.0, 0.0, 883194.0, 0.0, 4986...."
3,"[9.66682e-05, 0.00149067, 0.000707254]","[5.83635e-05, 0.000749799, -0.000119849]","[0.00118162, 0.0152198, -0.00278058]","[0.00520906, 0.0860681, 0.0175953]","[1.92354e-05, 0.00031667, 6.60781e-05]","[9.62207e-09, 1.08406e-07, -1.61198e-06]","[-7.92532, -14.7966]","[-8.05891, -14.9874]","[1.3239e+10, 4.81683e+09, 6.09379e+07, 6.85901...","[1.11258e+09, 3.90625e+08, 2.17448e+06, 361536...","[1.0883e+09, 4.45592e+08, 117216.0, 1.38344e+0...","[1.31801e+11, 4.74129e+10, 2.96153e+08, 6.0895...","[0.00407006, 0.0, 1.19025e-06, 0.0109498, 0.0]","[0.0, 0.0, 6.43265e+06, 0.0, 1.22162e+07, 0.0,..."
4,"[1.16095e-05, 0.000163645, 5.54287e-05]","[0.0, 0.0, 0.0]","[2.64387e-06, 2.96275e-05, -3.34442e-06]","[2.11155e-07, 3.00135e-06, 8.00668e-07]","[2.00683e-05, 0.000286261, 6.80333e-05]","[0.0, 0.0, 0.0]","[-12.3271, -12.5334]","[-12.4076, -12.5867]","[1.24951e+08, 4.2529e+07, 82991.5, 17914.2, 29...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[4.13762e+08, 1.45555e+08, 49222.2, 140586.0, ...","[2.75308e+07, 9.30426e+06, 14470.4, 2507.68, 4...","[4.69113e-05, 6.00094e-05, 6.96284e-07, 0.0056...","[0.0, 0.0, 696392.0, 0.0, 1.60724e+06, 0.0, 17..."


In [37]:
for i in gals2.loc[:, gals2.dtypes == object].columns:
    print(i)

MetalsColdGas
MetalsBulgeMass
MetalsDiskMass
MetalsHotGas
MetalsEjectedMass
MetalsICM
MagDust
Mag
DiskMass_elements
BulgeMass_elements
ColdGas_elements
HotGas_elements
DustRatesISM
Dust_elements


In [38]:
new_names = {
    "MetalsColdGas":['MetalsColdGas.AGB','MetalsColdGas.SNII','MetalsColdGas.SNIA'],
    "MetalsBulgeMass":['MetalsBulgeMass.AGB','MetalsBulgeMass.SNII','MetalsBulgeMass.SNIA'],
    "MetalsDiskMass":['MetalsDiskMass.AGB','MetalsDiskMass.SNII','MetalsDiskMass.SNIA'],
    "MetalsHotGas":['MetalsHotGas.AGB','MetalsHotGas.SNII','MetalsHotGas.SNIA'],
    "MetalsEjectedMass":['MetalsEjectedMass.AGB','MetalsEjectedMass.SNII','MetalsEjectedMass.SNIA'],
    "MetalsICM":['MetalsICM.AGB','MetalsICM.SNII','MetalsICM.SNIA'],
    "MagDust":['MagDust.FUV','MagDust.NUV'],
    "Mag":['Mag.FUV','Mag.NUV'],
    "DiskMass_elements":['DiskMass.H','DiskMass.He','DiskMass.C','DiskMass.N','DiskMass.O','DiskMass.Ne','DiskMass.Mg','DiskMass.Si','DiskMass.S','DiskMass.Ca','DiskMass.Fe'],
    "BulgeMass_elements":['BulgeMass.H','BulgeMass.He','BulgeMass.C','BulgeMass.N','BulgeMass.O','BulgeMass.Ne','BulgeMass.Mg','BulgeMass.Si','BulgeMass.S','BulgeMass.Ca','BulgeMass.Fe'],
    "ColdGas_elements":['ColdGas.H','ColdGas.He','ColdGas.C','ColdGas.N','ColdGas.O','ColdGas.Ne','ColdGas.Mg','ColdGas.Si','ColdGas.S','ColdGas.Ca','ColdGas.Fe'],
    "HotGas_elements":['HotGas.H','HotGas.He','HotGas.C','HotGas.N','HotGas.O','HotGas.Ne','HotGas.Mg','HotGas.Si','HotGas.S','HotGas.Ca','HotGas.Fe'],
    "DustRatesISM":['DustRate.AGB','DustRate.SNII','DustRate.SNIA','DustRate.GROW','DustRate.DEST'],
    "Dust_elements":['Dust.H','Dust.He','Dust.C','Dust.N','Dust.O','Dust.Ne','Dust.Mg','Dust.Si','Dust.S','Dust.Ca','Dust.Fe']
}

In [39]:
new_names['Mag']

['Mag.FUV', 'Mag.NUV']

In [40]:
for i in gals2.loc[:, gals2.dtypes == object].columns:
    print(new_names[i])

['MetalsColdGas.AGB', 'MetalsColdGas.SNII', 'MetalsColdGas.SNIA']
['MetalsBulgeMass.AGB', 'MetalsBulgeMass.SNII', 'MetalsBulgeMass.SNIA']
['MetalsDiskMass.AGB', 'MetalsDiskMass.SNII', 'MetalsDiskMass.SNIA']
['MetalsHotGas.AGB', 'MetalsHotGas.SNII', 'MetalsHotGas.SNIA']
['MetalsEjectedMass.AGB', 'MetalsEjectedMass.SNII', 'MetalsEjectedMass.SNIA']
['MetalsICM.AGB', 'MetalsICM.SNII', 'MetalsICM.SNIA']
['MagDust.FUV', 'MagDust.NUV']
['Mag.FUV', 'Mag.NUV']
['DiskMass.H', 'DiskMass.He', 'DiskMass.C', 'DiskMass.N', 'DiskMass.O', 'DiskMass.Ne', 'DiskMass.Mg', 'DiskMass.Si', 'DiskMass.S', 'DiskMass.Ca', 'DiskMass.Fe']
['BulgeMass.H', 'BulgeMass.He', 'BulgeMass.C', 'BulgeMass.N', 'BulgeMass.O', 'BulgeMass.Ne', 'BulgeMass.Mg', 'BulgeMass.Si', 'BulgeMass.S', 'BulgeMass.Ca', 'BulgeMass.Fe']
['ColdGas.H', 'ColdGas.He', 'ColdGas.C', 'ColdGas.N', 'ColdGas.O', 'ColdGas.Ne', 'ColdGas.Mg', 'ColdGas.Si', 'ColdGas.S', 'ColdGas.Ca', 'ColdGas.Fe']
['HotGas.H', 'HotGas.He', 'HotGas.C', 'HotGas.N', 'HotGas.O',

In [41]:
#tags = df['MetalsColdGas'].apply(pd.Series)
#tags = tags.rename(columns = lambda x : 'listcol_' + str(x))
#df = pd.concat([df[:], tags[:]], axis=1)
import time
#gals3= pd.DataFrame()
gals3=gals2
start = time.time()
print("Start the clock...!")


for i in gals3.loc[:, gals3.dtypes == object].columns:
    inside1= time.time()
    tags = gals3[i].apply(pd.Series)
    inside2= time.time()
    print(str(inside2 - inside1) + " sec")
    tags = tags.rename(columns = lambda x : new_names[i][x])
    inside3= time.time()
    print(str(inside3 - inside2) + " sec")
    gals3 = pd.concat([gals3[:], tags[:]], axis=1)
    inside4= time.time()
    print(str(inside4 - inside3) + " sec")
    gals3 = gals3.drop(i, axis=1)
    inside5= time.time()
    print(str(inside5 - inside4) + " sec")
    print("Finished "+i)

end = time.time()
print(str((end - start)/60) + " mins")

Start the clock...!
29.444571256637573 sec
0.003129720687866211 sec
0.049651145935058594 sec
0.9451580047607422 sec
Finished MetalsColdGas
27.819793224334717 sec
0.002003908157348633 sec
0.07666516304016113 sec
0.18781089782714844 sec
Finished MetalsBulgeMass
28.651572942733765 sec
0.002290010452270508 sec
0.0740048885345459 sec
0.1785290241241455 sec
Finished MetalsDiskMass
29.128042936325073 sec
0.0019690990447998047 sec
0.07709693908691406 sec
0.1680138111114502 sec
Finished MetalsHotGas
28.85132098197937 sec
0.0021648406982421875 sec
0.0587921142578125 sec
0.13933706283569336 sec
Finished MetalsEjectedMass
31.154160737991333 sec
0.0018622875213623047 sec
0.05864405632019043 sec
0.141463041305542 sec
Finished MetalsICM
31.0316960811615 sec
0.0013208389282226562 sec
0.05074596405029297 sec
0.07832813262939453 sec
Finished MagDust
30.99790596961975 sec
0.0017158985137939453 sec
0.051186323165893555 sec
0.07227706909179688 sec
Finished Mag
29.986217975616455 sec
0.009392023086547852 se

In [42]:
gals3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 151971 entries, 0 to 151970
Data columns (total 93 columns):
Type                      151971 non-null int64
ColdGas                   151971 non-null float64
StellarMass               151971 non-null float64
BulgeMass                 151971 non-null float64
DiskMass                  151971 non-null float64
HotGas                    151971 non-null float64
ICM                       151971 non-null float64
Sfr                       151971 non-null float64
SfrBulge                  151971 non-null float64
GasDiskRadius             151971 non-null float64
Attenuation_Dust          151971 non-null float64
MetalsColdGas.AGB         151971 non-null float32
MetalsColdGas.SNII        151971 non-null float32
MetalsColdGas.SNIA        151971 non-null float32
MetalsBulgeMass.AGB       151971 non-null float32
MetalsBulgeMass.SNII      151971 non-null float32
MetalsBulgeMass.SNIA      151971 non-null float32
MetalsDiskMass.AGB        151971 non-null

In [43]:
gals3 = gals3.drop('Dust.H', axis=1)

In [44]:
gals3.head()

Unnamed: 0,Type,ColdGas,StellarMass,BulgeMass,DiskMass,HotGas,ICM,Sfr,SfrBulge,GasDiskRadius,...,Dust.He,Dust.C,Dust.N,Dust.O,Dust.Ne,Dust.Mg,Dust.Si,Dust.S,Dust.Ca,Dust.Fe
0,0,0.160346,0.012535,0.0,0.012535,0.095083,0.0,0.070707,0.0,0.012735,...,0.0,628166.25,0.0,1939243.0,0.0,305.451324,1035.151,0.0,0.0,156684.9
1,1,0.084843,0.177387,0.015681,0.161706,0.042501,0.0,0.154248,0.0,0.004789,...,0.0,1329056.75,0.0,3352699.0,0.0,6425.539551,97680.73,0.0,0.0,198600.9
2,2,0.02433,0.032784,0.0,0.032784,0.0,0.0,0.0,0.0,0.004394,...,0.0,416839.75,0.0,883194.2,0.0,4986.804199,64618.93,0.0,0.0,87878.76
3,0,0.10585,1.332626,0.101917,1.230709,12.185545,0.000915,0.0,0.0,0.015153,...,0.0,6432646.0,0.0,12216200.0,0.0,825207.875,1584776.0,0.0,0.0,1496692.0
4,1,0.037885,0.011302,0.0,0.011302,0.002483,0.0,0.004147,0.0,0.005215,...,0.0,696391.5,0.0,1607243.0,0.0,17340.447266,59712.22,0.0,0.0,141712.5


In [45]:
#df2[['team1','team2']] = pd.DataFrame([x for x in df2.teams])
gals2.MetalsColdGas.head()

0    [1.96429e-05, 0.000284727, 7.23087e-05]
1    [4.73383e-05, 0.000772251, 0.000175075]
2     [9.67711e-06, 0.000151559, 3.9816e-05]
3     [9.66682e-05, 0.00149067, 0.000707254]
4    [1.16095e-05, 0.000163645, 5.54287e-05]
Name: MetalsColdGas, dtype: object

In [46]:
testdf = pd.DataFrame()
testdf = gals2['Type']
testdf.head()

0    0
1    1
2    2
3    0
4    1
Name: Type, dtype: int64

In [47]:
#testdf[['AGB'],['SNe'],['Sct']] = pd.DataFrame([x for x in gals2.MetalsColdGas])

In [48]:
#testdf[['AGB'],['SNe'],['Sct']] = pd.DataFrame(gals2.MetalsColdGas.values.tolist())
testdf2 = pd.DataFrame(gals2['MetalsColdGas'].values.tolist(),columns=['AGB','SNe','Scott'])
#testdf['AGB','SNe','Scott'] = pd.DataFrame(gals2['MetalsColdGas'].values.tolist())

In [49]:
testdf2.head()

Unnamed: 0,AGB,SNe,Scott
0,2e-05,0.000285,7.2e-05
1,4.7e-05,0.000772,0.000175
2,1e-05,0.000152,4e-05
3,9.7e-05,0.001491,0.000707
4,1.2e-05,0.000164,5.5e-05


In [50]:
newnew3 = pd.concat([testdf, testdf2],axis=1)

In [51]:
newnew3

Unnamed: 0,Type,AGB,SNe,Scott
0,0,1.964290e-05,0.000285,7.230874e-05
1,1,4.733827e-05,0.000772,1.750745e-04
2,2,9.677114e-06,0.000152,3.981599e-05
3,0,9.666817e-05,0.001491,7.072539e-04
4,1,1.160948e-05,0.000164,5.542869e-05
5,2,6.819254e-06,0.000090,4.094508e-05
6,2,1.403179e-05,0.000203,8.008605e-05
7,0,1.505533e-04,0.002264,1.056489e-03
8,1,2.452068e-05,0.000355,9.972918e-05
9,0,0.000000e+00,0.000000,0.000000e+00


In [52]:
#tags = df['MetalsColdGas'].apply(pd.Series)
#tags = tags.rename(columns = lambda x : 'listcol_' + str(x))
#df = pd.concat([df[:], tags[:]], axis=1)
(nTrees,nHalos,nTreeHalos,gals) = \
    read_snap(datadir,file_prefix,5,7,\
                            props,struct_dtype)
    
gals2 = pd.DataFrame()
for column in columns:
    gals2[column]=list(gals[column])

#testdf = pd.DataFrame()
#testdf = gals2['Type']
#testdf2 = pd.DataFrame(gals2['MetalsColdGas'].values.tolist(),columns=['AGB','SNe','Scott'])
#newnew3 = pd.concat([testdf, testdf2],axis=1)

import time
start = time.time()

gals3=gals2
for i in gals3.loc[:, gals3.dtypes == object].columns:
    temp_df = pd.DataFrame()
    temp_df = pd.DataFrame(gals3[i].values.tolist(),columns= new_names[i])
    gals3 = pd.concat([gals3,temp_df],axis=1)
    gals3 = gals3.drop(i, axis=1)
    print("Finished "+i)

end = time.time()
print(str((end - start)) + " secs")

#import time
#gals3= pd.DataFrame()
#gals3=gals2
#start = time.time()
#print("Start the clock...!")


#for i in gals3.loc[:, gals3.dtypes == object].columns:
#    inside1= time.time()
##    tags = gals3[i].apply(pd.Series)
#    inside2= time.time()
#    print(str(inside2 - inside1) + " sec")
#    tags = tags.rename(columns = lambda x : new_names[i][x])
#    inside3= time.time()
#    print(str(inside3 - inside2) + " sec")
#    gals3 = pd.concat([gals3[:], tags[:]], axis=1)
#    inside4= time.time()
#    print(str(inside4 - inside3) + " sec")
#    gals3 = gals3.drop(i, axis=1)
#    inside5= time.time()
#    print(str(inside5 - inside4) + " sec")
#    print("Finished "+i)
#
#end = time.time()
#print(str((end - start)/60) + " mins")

Total nGals =  151971
File  5  nGals =  45631
File  6  nGals =  56497
File  7  nGals =  49843
Finished MetalsColdGas
Finished MetalsBulgeMass
Finished MetalsDiskMass
Finished MetalsHotGas
Finished MetalsEjectedMass
Finished MetalsICM
Finished MagDust
Finished Mag
Finished DiskMass_elements
Finished BulgeMass_elements
Finished ColdGas_elements
Finished HotGas_elements
Finished DustRatesISM
Finished Dust_elements
10.468410730361938 secs


In [53]:
gals3.columns

Index(['Type', 'ColdGas', 'StellarMass', 'BulgeMass', 'DiskMass', 'HotGas',
       'ICM', 'Sfr', 'SfrBulge', 'GasDiskRadius', 'Attenuation_Dust',
       'MetalsColdGas.AGB', 'MetalsColdGas.SNII', 'MetalsColdGas.SNIA',
       'MetalsBulgeMass.AGB', 'MetalsBulgeMass.SNII', 'MetalsBulgeMass.SNIA',
       'MetalsDiskMass.AGB', 'MetalsDiskMass.SNII', 'MetalsDiskMass.SNIA',
       'MetalsHotGas.AGB', 'MetalsHotGas.SNII', 'MetalsHotGas.SNIA',
       'MetalsEjectedMass.AGB', 'MetalsEjectedMass.SNII',
       'MetalsEjectedMass.SNIA', 'MetalsICM.AGB', 'MetalsICM.SNII',
       'MetalsICM.SNIA', 'MagDust.FUV', 'MagDust.NUV', 'Mag.FUV', 'Mag.NUV',
       'DiskMass.H', 'DiskMass.He', 'DiskMass.C', 'DiskMass.N', 'DiskMass.O',
       'DiskMass.Ne', 'DiskMass.Mg', 'DiskMass.Si', 'DiskMass.S',
       'DiskMass.Ca', 'DiskMass.Fe', 'BulgeMass.H', 'BulgeMass.He',
       'BulgeMass.C', 'BulgeMass.N', 'BulgeMass.O', 'BulgeMass.Ne',
       'BulgeMass.Mg', 'BulgeMass.Si', 'BulgeMass.S', 'BulgeMass.Ca',
       

In [55]:
gals3.to_pickle('./save.pkl')

In [56]:
gals3.to_hdf('./save2.hdf',key='gal',format='t')

In [57]:
gals3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 151971 entries, 0 to 151970
Data columns (total 93 columns):
Type                      151971 non-null int64
ColdGas                   151971 non-null float64
StellarMass               151971 non-null float64
BulgeMass                 151971 non-null float64
DiskMass                  151971 non-null float64
HotGas                    151971 non-null float64
ICM                       151971 non-null float64
Sfr                       151971 non-null float64
SfrBulge                  151971 non-null float64
GasDiskRadius             151971 non-null float64
Attenuation_Dust          151971 non-null float64
MetalsColdGas.AGB         151971 non-null float64
MetalsColdGas.SNII        151971 non-null float64
MetalsColdGas.SNIA        151971 non-null float64
MetalsBulgeMass.AGB       151971 non-null float64
MetalsBulgeMass.SNII      151971 non-null float64
MetalsBulgeMass.SNIA      151971 non-null float64
MetalsDiskMass.AGB        151971 non-null

In [59]:
import h5py
h5save = h5py.File("mytestfile.hdf5", "w")

In [61]:
h5save.create_dataset('gals', data=gals)

<HDF5 dataset "gals": shape (151971,), type "|V372">

In [66]:
h5save.name

'/'

In [71]:
 b = h5save['gals'][:]

In [73]:
b['Type']

array([0, 1, 2, ..., 0, 0, 0], dtype=int32)

In [74]:
c = h5save['gals']['Type']

In [75]:
c

array([0, 1, 2, ..., 0, 0, 0], dtype=int32)

In [79]:
print(b.keys())

AttributeError: 'numpy.ndarray' object has no attribute 'keys'

In [80]:
h5save.close()

In [81]:
b

array([ (0,  0.16034566,  0.01253473,  0.      ,  0.01253473,  0.09508331,  0., [  1.96429010e-05,   2.84727372e-04,   7.23087433e-05], [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00], [  2.23552433e-06,   2.93505782e-05,  -2.65424751e-06], [  1.53810493e-06,   2.31420036e-05,   4.92734307e-06], [  8.90891715e-06,   1.41200610e-04,   1.97738882e-05], [ 0.,  0.,  0.],  0.07070739,  0.,  0.01273489, [-15.83811665, -15.74319744], [-15.85059166, -15.7519989 ], [  1.38743200e+08,   4.70600560e+07,   5.88678633e+04,   1.46508867e+04,   2.94605156e+05,   2.89798418e+04,   2.10873809e+04,   2.11382363e+04,   1.09457344e+04,   1.42501807e+03,   2.72156445e+04], [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,   0.00000000e+00,   0.00000000e+00,   0.00000000e+00,   0.00000000e+00,   0.00000000e+00,   0.00000000e+00,   0.00000000e+00,   0.00000000e+00], [  1.77367501e+09,   6.03056320e+08,   3.34294375e+05,   2.30782641e+05,   1.11044912e+06,   3.48812344e+05,   2.52056656e+05,   2.5