# Inferring Photometric Redshifts from Multichannel Images
## Balázs Menkó (O67UT7)
### Supervisor: Pál, Balázs 

---

# Create Database

In [1]:
# Number of pictures in the database:
NUM_IMAGES = 10000 # maximum=19589
UPLOAD_BATCH = 50

from utils import *

# Create `redshift` table 

```python
connection = connect_from_settings(pgsql_settings)
cursor = connection.cursor()
cursor.execute("""
    CREATE TABLE IF NOT EXISTS redshift (
        index INT PRIMARY KEY,
        specObjID BIGINT,
        ra FLOAT,
        dec FLOAT,
        z FLOAT,
        zErr FLOAT,
        velDisp FLOAT,
        velDispErr FLOAT,
        picture INT[]);
""")
connection.commit()
cursor.close()
connection.close()
```

In [2]:
### Find the last object ID which has been uploaded to the database

try: # for first try the database is empty 
    MAX_IDX = int(run_query("SELECT COUNT(index) FROM redshift").iloc[0,0])
except:
    MAX_IDX=0
print(f'MAX_IDX={MAX_IDX}')

specObjID_csv = pd.read_csv('specObjIDs.csv', names=['ids'])

ids_to_save = tuple(specObjID_csv['ids'][MAX_IDX:MAX_IDX+UPLOAD_BATCH].tolist())

MAX_IDX=10000


## Querry object ID, radial and declensional degree and redshift parameter ($z$)

In [3]:
results = SDSS.query_sql(f"""
    SELECT specobjid As specObjID, ra, dec, z, zErr, velDisp, velDispErr
    FROM SpecObj
    WHERE specobjid IN {ids_to_save}
""")
df = results.to_pandas()

## Download images for given coordinate pairs

In [4]:
if MAX_IDX < NUM_IMAGES:
    data = [
        (index+MAX_IDX, row['specObjID'], row['ra'], row['dec'],
         row['z'], row['zErr'], row['velDisp']/1000, row['velDispErr']/1000,
         get_sdss_image(row['ra'], row['dec'], size=IMG_SIZE)
        )
        for index, row in tqdm(df.iterrows(), total=UPLOAD_BATCH, desc="Processing")
    ]

## Upload data to `postgres-datasci.db-test` server

In [5]:
if MAX_IDX < NUM_IMAGES:
    connection = connect_from_settings(pgsql_settings)
    cursor = connection.cursor()
    cursor.executemany("""
        INSERT INTO redshift (index, specObjID, ra, dec, z, zErr, velDisp, velDispErr, picture)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
        ON CONFLICT (index) DO NOTHING;
    """, data)
    connection.commit()
    cursor.close()
    connection.close()

# Combined downloading an uploading

In [6]:
MAX_IDX = int(run_query("SELECT COUNT(index) FROM redshift").iloc[0,0])
while MAX_IDX < NUM_IMAGES:
    clear_output(wait=True)  # Clear the output
    print(f'{MAX_IDX}/{NUM_IMAGES} records have been added to the database yet.', end='\r')
    ## Get the next 50 ids
    ids_to_save = tuple(specObjID_csv['ids'][MAX_IDX:MAX_IDX+UPLOAD_BATCH].tolist())    

    ## Get the next 50 datapoints from SDSS dataset
    results = SDSS.query_sql(f"""
        SELECT specObjID, ra, dec, z
        FROM SpecObj
        WHERE specObjID IN {ids_to_save}
    """)
    
    ## Generate a list for uploading data
    data = [
        (index+MAX_IDX, row['specObjID'], row['ra'], row['dec'],
        row['z'], row['zErr'], row['velDisp']/1000, row['velDispErr']/1000,
        get_sdss_image(row['ra'], row['dec'], size=IMG_SIZE)
        )
        for index, row in tqdm(df.iterrows(), total=UPLOAD_BATCH, desc="Processing")
    ]
    
    ## Uploading data into the database
    connection = connect_from_settings(pgsql_settings)
    cursor = connection.cursor()
    cursor.executemany("""
        INSERT INTO redshift (index, specObjID, ra, dec, z, zErr, velDisp, velDispErr, picture)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
        ON CONFLICT (index) DO NOTHING;
    """, data)
    connection.commit()
    cursor.close()
    connection.close()

    ### Reset MAX_IDX
    MAX_IDX = int(run_query("SELECT COUNT(index) FROM redshift").iloc[0,0])   

    ### End of the loop
    
clear_output(wait=True)  # Clear the output   
print(f'{MAX_IDX}/{NUM_IMAGES} records have been added to the database yet.', end='\r')

10000/10000 records have been added to the database yet.