forked from tardis-sn/carsus
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
309 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
from carsus.model import Atom, AtomicWeight, DataSource | ||
from pandas import read_sql_query, HDFStore | ||
|
||
|
||
class Dataset(object): | ||
|
||
def __init__(self, name, data=None): | ||
self.name = name | ||
self._data = data | ||
|
||
@property | ||
def data(self): | ||
if self._data is None: | ||
raise ValueError('Data is not available please load first') | ||
else: | ||
return self._data | ||
|
||
@data.setter | ||
def data(self, value): | ||
self._data = value | ||
|
||
|
||
class AtomsDataset(Dataset): | ||
|
||
def __init__(self, name="atoms", data=None): | ||
super(AtomsDataset, self).__init__(name, data) | ||
|
||
def load_sql(self, session, load_atomic_weights=False): | ||
q = session.query(Atom.atomic_number.label("atomic_number"), | ||
Atom.symbol.label("symbol"), | ||
Atom.name.label("name"), | ||
Atom.group.label("group"), | ||
Atom.period.label("period"), | ||
) | ||
if load_atomic_weights: | ||
|
||
q = q.add_columns(AtomicWeight.value.label("atomic_weight_value"), | ||
AtomicWeight.std_dev.label("atomic_weight_uncert"), | ||
AtomicWeight.unit.label("atomic_weight_unit"), | ||
DataSource.short_name.label("atomic_weight_data_source")).\ | ||
join(Atom.quantities.of_type(AtomicWeight)).\ | ||
join(AtomicWeight.data_source) | ||
|
||
df = read_sql_query(q.selectable, session.bind, index_col="atomic_number") | ||
self.data = df | ||
|
||
|
||
def create_hdf(fname, datasets): | ||
""" | ||
Store datasets in a HDF file | ||
Parameters | ||
---------- | ||
fname : str | ||
The name of the HDF file | ||
datasets : list of Dataset objects | ||
The datasets that will be put in the HDF file | ||
""" | ||
with HDFStore(fname, "w") as store: | ||
for dataset in datasets: | ||
store[dataset.name] = dataset.data | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,244 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Initializing the database\n", | ||
"Ingesting basic atomic data\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"from carsus import init_db\n", | ||
"session = init_db(\"sqlite://\")\n", | ||
"session.commit()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Downloading the data from http://physics.nist.gov/cgi-bin/Compositions/stand_alone.pl\n", | ||
"Ingesting atomic weights\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"from carsus.io.nist import NISTWeightsCompIngester\n", | ||
"ingester = NISTWeightsCompIngester()\n", | ||
"ingester.download()\n", | ||
"ingester.ingest(session)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
" symbol name group period\n", | ||
"atomic_number \n", | ||
"1 H Hydrogen 1 1\n", | ||
"2 He Helium 18 1\n", | ||
"3 Li Lithium 1 2\n", | ||
"4 Be Beryllium 2 2\n", | ||
"5 B Boron 13 2\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"from carsus.io.output import AtomsDataset, Dataset\n", | ||
"\n", | ||
"# Create a dataset and load with the atoms data from the database. \n", | ||
"\n", | ||
"atoms = AtomsDataset()\n", | ||
"atoms.load_sql(session)\n", | ||
"print atoms.data.head(5)\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
" symbol name group period atomic_weight_value \\\n", | ||
"atomic_number \n", | ||
"1 H Hydrogen 1 1 1.007975 \n", | ||
"2 He Helium 18 1 4.002602 \n", | ||
"3 Li Lithium 1 2 6.967500 \n", | ||
"4 Be Beryllium 2 2 9.012183 \n", | ||
"5 B Boron 13 2 10.813500 \n", | ||
"\n", | ||
" atomic_weight_uncert atomic_weight_unit \\\n", | ||
"atomic_number \n", | ||
"1 1.350000e-04 u \n", | ||
"2 2.000000e-06 u \n", | ||
"3 2.950000e-02 u \n", | ||
"4 5.000000e-07 u \n", | ||
"5 7.500000e-03 u \n", | ||
"\n", | ||
" atomic_weight_data_source \n", | ||
"atomic_number \n", | ||
"1 nist \n", | ||
"2 nist \n", | ||
"3 nist \n", | ||
"4 nist \n", | ||
"5 nist \n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# Include atomic weights into the dataset\n", | ||
"\n", | ||
"atoms2 = AtomsDataset()\n", | ||
"atoms2.load_sql(session, load_atomic_weights=True)\n", | ||
"print atoms2.data.head(5)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"atomic_number\n", | ||
"1 11.016014\n", | ||
"2 26.020823\n", | ||
"3 58.546056\n", | ||
"4 91.219444\n", | ||
"5 126.931782\n", | ||
"Name: computed_field, dtype: float64\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# It is easy to create new computed fields with pandas dataframes. \n", | ||
"# Special methods of the classes derived from the ``Dataset`` class could be used to create such fields\n", | ||
"\n", | ||
"atoms2.data[\"computed_field\"] = atoms2.data[\"atomic_weight_value\"]**2 + 10\n", | ||
"print atoms2.data[\"computed_field\"].head(5)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# It is also possible to create a dataset manually\n", | ||
"\n", | ||
"from numpy.random import randn\n", | ||
"from pandas import DataFrame\n", | ||
"df = DataFrame(randn(8, 3), columns=['A', 'B', 'C'])\n", | ||
"some_dataset = Dataset(name=\"some\", data=df)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"/home/misha/.virtualenvs/carsus/lib/python2.7/site-packages/ipykernel/__main__.py:2: PerformanceWarning: \n", | ||
"your performance may suffer as PyTables will pickle object types that it cannot\n", | ||
"map directly to c-types [inferred_type->unicode,key->block1_values] [items->['symbol', 'name']]\n", | ||
"\n", | ||
" from ipykernel import kernelapp as app\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# create_hdf() is used to put several datasets into an HDF file. Datasets' names are used as the identifiers in the file.\n", | ||
"\n", | ||
"from carsus.io.output import create_hdf\n", | ||
"create_hdf(\"store.h5\", datasets=[atoms, some_dataset])\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
" symbol name group period\n", | ||
"atomic_number \n", | ||
"1 H Hydrogen 1 1\n", | ||
"2 He Helium 18 1\n", | ||
"3 Li Lithium 1 2\n", | ||
"4 Be Beryllium 2 2\n", | ||
"5 B Boron 13 2\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"from pandas import read_hdf\n", | ||
"atoms = read_hdf(\"store.h5\", \"atoms\")\n", | ||
"print atoms.head(5)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 2", | ||
"language": "python", | ||
"name": "python2" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 2 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython2", | ||
"version": "2.7.10" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |