Skip to content

Commit

Permalink
first commit for the output module
Browse files Browse the repository at this point in the history
  • Loading branch information
mishinma committed May 3, 2016
1 parent cc9d02a commit 6c245df
Show file tree
Hide file tree
Showing 2 changed files with 309 additions and 0 deletions.
65 changes: 65 additions & 0 deletions carsus/io/output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from carsus.model import Atom, AtomicWeight, DataSource
from pandas import read_sql_query, HDFStore


class Dataset(object):

def __init__(self, name, data=None):
self.name = name
self._data = data

@property
def data(self):
if self._data is None:
raise ValueError('Data is not available please load first')
else:
return self._data

@data.setter
def data(self, value):
self._data = value


class AtomsDataset(Dataset):

def __init__(self, name="atoms", data=None):
super(AtomsDataset, self).__init__(name, data)

def load_sql(self, session, load_atomic_weights=False):
q = session.query(Atom.atomic_number.label("atomic_number"),
Atom.symbol.label("symbol"),
Atom.name.label("name"),
Atom.group.label("group"),
Atom.period.label("period"),
)
if load_atomic_weights:

q = q.add_columns(AtomicWeight.value.label("atomic_weight_value"),
AtomicWeight.std_dev.label("atomic_weight_uncert"),
AtomicWeight.unit.label("atomic_weight_unit"),
DataSource.short_name.label("atomic_weight_data_source")).\
join(Atom.quantities.of_type(AtomicWeight)).\
join(AtomicWeight.data_source)

df = read_sql_query(q.selectable, session.bind, index_col="atomic_number")
self.data = df


def create_hdf(fname, datasets):
"""
Store datasets in a HDF file
Parameters
----------
fname : str
The name of the HDF file
datasets : list of Dataset objects
The datasets that will be put in the HDF file
"""
with HDFStore(fname, "w") as store:
for dataset in datasets:
store[dataset.name] = dataset.data


244 changes: 244 additions & 0 deletions notebooks/Output module.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Initializing the database\n",
"Ingesting basic atomic data\n"
]
}
],
"source": [
"from carsus import init_db\n",
"session = init_db(\"sqlite://\")\n",
"session.commit()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading the data from http://physics.nist.gov/cgi-bin/Compositions/stand_alone.pl\n",
"Ingesting atomic weights\n"
]
}
],
"source": [
"from carsus.io.nist import NISTWeightsCompIngester\n",
"ingester = NISTWeightsCompIngester()\n",
"ingester.download()\n",
"ingester.ingest(session)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" symbol name group period\n",
"atomic_number \n",
"1 H Hydrogen 1 1\n",
"2 He Helium 18 1\n",
"3 Li Lithium 1 2\n",
"4 Be Beryllium 2 2\n",
"5 B Boron 13 2\n"
]
}
],
"source": [
"from carsus.io.output import AtomsDataset, Dataset\n",
"\n",
"# Create a dataset and load with the atoms data from the database. \n",
"\n",
"atoms = AtomsDataset()\n",
"atoms.load_sql(session)\n",
"print atoms.data.head(5)\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" symbol name group period atomic_weight_value \\\n",
"atomic_number \n",
"1 H Hydrogen 1 1 1.007975 \n",
"2 He Helium 18 1 4.002602 \n",
"3 Li Lithium 1 2 6.967500 \n",
"4 Be Beryllium 2 2 9.012183 \n",
"5 B Boron 13 2 10.813500 \n",
"\n",
" atomic_weight_uncert atomic_weight_unit \\\n",
"atomic_number \n",
"1 1.350000e-04 u \n",
"2 2.000000e-06 u \n",
"3 2.950000e-02 u \n",
"4 5.000000e-07 u \n",
"5 7.500000e-03 u \n",
"\n",
" atomic_weight_data_source \n",
"atomic_number \n",
"1 nist \n",
"2 nist \n",
"3 nist \n",
"4 nist \n",
"5 nist \n"
]
}
],
"source": [
"# Include atomic weights into the dataset\n",
"\n",
"atoms2 = AtomsDataset()\n",
"atoms2.load_sql(session, load_atomic_weights=True)\n",
"print atoms2.data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"atomic_number\n",
"1 11.016014\n",
"2 26.020823\n",
"3 58.546056\n",
"4 91.219444\n",
"5 126.931782\n",
"Name: computed_field, dtype: float64\n"
]
}
],
"source": [
"# It is easy to create new computed fields with pandas dataframes. \n",
"# Special methods of the classes derived from the ``Dataset`` class could be used to create such fields\n",
"\n",
"atoms2.data[\"computed_field\"] = atoms2.data[\"atomic_weight_value\"]**2 + 10\n",
"print atoms2.data[\"computed_field\"].head(5)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# It is also possible to create a dataset manually\n",
"\n",
"from numpy.random import randn\n",
"from pandas import DataFrame\n",
"df = DataFrame(randn(8, 3), columns=['A', 'B', 'C'])\n",
"some_dataset = Dataset(name=\"some\", data=df)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/misha/.virtualenvs/carsus/lib/python2.7/site-packages/ipykernel/__main__.py:2: PerformanceWarning: \n",
"your performance may suffer as PyTables will pickle object types that it cannot\n",
"map directly to c-types [inferred_type->unicode,key->block1_values] [items->['symbol', 'name']]\n",
"\n",
" from ipykernel import kernelapp as app\n"
]
}
],
"source": [
"# create_hdf() is used to put several datasets into an HDF file. Datasets' names are used as the identifiers in the file.\n",
"\n",
"from carsus.io.output import create_hdf\n",
"create_hdf(\"store.h5\", datasets=[atoms, some_dataset])\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" symbol name group period\n",
"atomic_number \n",
"1 H Hydrogen 1 1\n",
"2 He Helium 18 1\n",
"3 Li Lithium 1 2\n",
"4 Be Beryllium 2 2\n",
"5 B Boron 13 2\n"
]
}
],
"source": [
"from pandas import read_hdf\n",
"atoms = read_hdf(\"store.h5\", \"atoms\")\n",
"print atoms.head(5)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

0 comments on commit 6c245df

Please sign in to comment.