# Useful example of saving a large query result directly into a csv file

**By "large", I mean larger than the available RAM memory on your computer.**

In [None]:
import pyodbc
from getpass import getpass
import pandas as pd

userid = getpass(prompt='Enter user ID: ')
pw     = getpass(prompt='Enter password: ')

In [None]:
cnxn_string = 'DSN=YOUR_DSN;UID=' + userid + ';PWD=' + pw

cnxn = pyodbc.connect(cnxn_string)
cursor = cnxn.cursor()

sql = """
SELECT
    COLUMN1,
    COLUMN2,
    .
    .
    .
    COLUMN_n

FROM
    SCHEMA.TABLE1 as TABLE1

    INNER JOIN SCHEMA.TABLE2 as TABLE2 ON
        TABLE1.COMMON_KEY = TABLE2.COMMON_KEY

    INNER JOIN SCHEMA.TABLE3 as TABLE3 ON
        TABLE1.COMMON_KEY = TABLE3.COMMON_KEY

WHERE
    TABLE2.SOME_COLUMN = 'YOUR_CAR'
    AND TABLE1.SOME_DATE_COLUMN between '2008-01-01- and '2008-03-31'

ORDER BY
    TABLE3.SOME_COLUMN_TO_SORT_ON
"""

# df = pd.read_sql(sql, cnxn, index_col=None)

# For large result set (> RAM) that you want to save directly into a csv file:
for c in pd.read_sql(sql, cnxn, chunksize=10000):
    c.to_csv(r'D:\temp\lots_of_data.csv', index=False, mode='a', header=False)

# Close connections
cursor.close()
cnxn.close()

### Documentation on read_sql() and to_csv() functions:

In [2]:
help(pd.read_sql)

Help on function read_sql in module pandas.io.sql:

read_sql(sql, con, index_col=None, coerce_float=True, params=None, parse_dates=None, columns=None, chunksize=None)
    Read SQL query or database table into a DataFrame.
    
    Parameters
    ----------
    sql : string SQL query or SQLAlchemy Selectable (select or text object)
        to be executed, or database table name.
    con : SQLAlchemy connectable(engine/connection) or database string URI
        or DBAPI2 connection (fallback mode)
        Using SQLAlchemy makes it possible to use any DB supported by that
        library.
        If a DBAPI2 object, only sqlite3 is supported.
    index_col : string or list of strings, optional, default: None
        Column(s) to set as index(MultiIndex)
    coerce_float : boolean, default True
        Attempt to convert values of non-string, non-numeric objects (like
        decimal.Decimal) to floating point, useful for SQL result sets
    params : list, tuple or dict, optional, default:

In [4]:
help(pd.DataFrame.to_csv)

Help on function to_csv in module pandas.core.frame:

to_csv(self, path_or_buf=None, sep=',', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, mode='w', encoding=None, compression=None, quoting=None, quotechar='"', line_terminator='\n', chunksize=None, tupleize_cols=False, date_format=None, doublequote=True, escapechar=None, decimal='.')
    Write DataFrame to a comma-separated values (csv) file
    
    Parameters
    ----------
    path_or_buf : string or file handle, default None
        File path or object, if None is provided the result is returned as
        a string.
    sep : character, default ','
        Field delimiter for the output file.
    na_rep : string, default ''
        Missing data representation
    float_format : string, default None
        Format string for floating point numbers
    columns : sequence, optional
        Columns to write
    header : boolean or list of string, default True
        Write out column names. If 