In [1]:
import os
import glob
import shutil
import pandas as pd

In [None]:
print(os.getcwd())
csv_files = glob.glob("csv_files/*.csv")
for file in csv_files:
    shutil.move(file, "backup_folder/")
    print(f"Moved file: {file}")

# Automating Export
def export_data(df, filename, format):
    if format == "csv":
        df.to_csv(filename, index=False)
        print(f"Data exported to {filename} in CSV format.")
    elif format == "json":
        df.to_json(filename, orient="records")
        print(f"Data exported to {filename} in JSON format.")
    else:
        print("Unsupported format.")


d:\Work\PlusW\Lecture_5
Moved file: books.csv
Moved file: pubmed_articles.csv


In [5]:
# Example usage:
# Creating a sample dataframe
data = {'Name': ['Alice', 'Bob', 'Charlie'],
'Age': [25, 30, 35],
'City': ['New York', 'Los Angeles', 'Chicago']}
df = pd.DataFrame(data)
# Exporting to CSV
export_data(df, "output.csv", "csv")
# Exporting to JSON
export_data(df, "output.json", "json")

Data exported to output.csv in CSV format.
Data exported to output.json in JSON format.


In [10]:
%pip install yfinance
import yfinance as yf
import sqlite3
import pandas as pd
import time

Collecting yfinance
  Downloading yfinance-0.2.54-py2.py3-none-any.whl (108 kB)
     ---------------------------------------- 0.0/108.7 kB ? eta -:--:--
     ----------- --------------------------- 30.7/108.7 kB 1.3 MB/s eta 0:00:01
     --------------------------------- ----- 92.2/108.7 kB 1.1 MB/s eta 0:00:01
     --------------------------------- -- 102.4/108.7 kB 837.8 kB/s eta 0:00:01
     ------------------------------------ 108.7/108.7 kB 787.7 kB/s eta 0:00:00
Collecting frozendict>=2.3.4
  Downloading frozendict-2.4.6-cp310-cp310-win_amd64.whl (37 kB)
Collecting requests>=2.31
  Using cached requests-2.32.3-py3-none-any.whl (64 kB)
Collecting beautifulsoup4>=4.11.1
  Downloading beautifulsoup4-4.13.3-py3-none-any.whl (186 kB)
     ---------------------------------------- 0.0/186.0 kB ? eta -:--:--
     ------------------- ------------------- 92.2/186.0 kB 2.6 MB/s eta 0:00:01
     -------------------------------------  184.3/186.0 kB 2.2 MB/s eta 0:00:01
     -----------------


[notice] A new release of pip is available: 23.0.1 -> 25.0.1
[notice] To update, run: C:\Users\mrg18\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [13]:
# Database setup
db_name = "stocks.db"
conn = sqlite3.connect(db_name)
cursor = conn.cursor()
cursor.execute('''CREATE TABLE IF NOT EXISTS stock_data (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                symbol TEXT,
                timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
                open REAL,
                high REAL,
                low REAL,
                close REAL,
                volume INTEGER)''')
conn.commit()

# Function to fetch stock data

def fetch_stock_data(symbol):
    try:
        stock = yf.Ticker(symbol)
        data = stock.history(period="1d", interval="1m")
        if data.empty:
            print(f"No data found for {symbol}. Skipping...")
            return None # Return None if no data is available
        latest = data.iloc[-1] # Get the most recent price data
        return {
        "symbol": symbol,
        "open": latest["Open"],
        "high": latest["High"],
        "low": latest["Low"],
        "close": latest["Close"],
        "volume": latest["Volume"]
        }
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")
        return None
    
# Function to store data in SQLite

def store_data(symbol):
    stock_data = fetch_stock_data(symbol)
    if stock_data: # Only store if data is available
        cursor.execute('''INSERT INTO stock_data (symbol, open, high, low,
                        close, volume)
                        VALUES (?, ?, ?, ?, ?, ?)''',

    (stock_data["symbol"], stock_data["open"],
    stock_data["high"],
    stock_data["low"], stock_data["close"],
    stock_data["volume"]))
    conn.commit()
    print(f"Stored data for {symbol}")
    
# Function to analyze stock data
def analyze_stock(symbol):
    df = pd.read_sql_query("SELECT * FROM stock_data WHERE symbol=? ORDER BY "
    "timestamp DESC LIMIT 100", conn, params=(symbol,))
    print(df)

# Example Usage
symbol = "NVDA" # NVIDIA
for _ in range(5): # Fetch data 5 times with intervals
    store_data(symbol)
    time.sleep(60) # Wait for 1 minute before fetching again

analyze_stock(symbol)
# Close database connection
conn.close()

Stored data for NVDA
Stored data for NVDA
Stored data for NVDA
Stored data for NVDA
Stored data for NVDA
   id symbol            timestamp        open        high         low  \
0  10   NVDA  2025-03-20 21:40:15  118.419998  118.529999  118.309998   
1   9   NVDA  2025-03-20 21:39:14  118.419998  118.529999  118.309998   
2   8   NVDA  2025-03-20 21:38:14  118.419998  118.529999  118.309998   
3   7   NVDA  2025-03-20 21:37:14  118.419998  118.529999  118.309998   
4   6   NVDA  2025-03-20 21:36:13  118.419998  118.529999  118.309998   

        close   volume  
0  118.470001  3024760  
1  118.470001  3024760  
2  118.470001  3024760  
3  118.470001  3024760  
4  118.470001  3024760  


In [12]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [15]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

BASE_URL = "https://pubmed.ncbi.nlm.nih.gov/"
search_query = "genomics"
num_pages = 1

def get_pubmed_articles(query, pages):
    articles_list = []
    for page in range(1, pages + 1):
        url = f"{BASE_URL}?term={query}&page={page}"
        response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
        soup = BeautifulSoup(response.text, "html.parser")
        
        articles = soup.find_all("article", class_="full-docsum")
        for article in articles:
            title_tag = article.find("a", class_="docsum-title")
            title = title_tag.get_text(strip=True) if title_tag else "No title"
            
            summary_tag = article.find("div", class_="full-view-snippet")
            summary = summary_tag.get_text(strip=True) if summary_tag else "No summary"
            
            articles_list.append({"Title": title, "Summary": summary})
    
    return articles_list

articles_data = get_pubmed_articles(search_query, num_pages)
df = pd.DataFrame(articles_data)
df.to_csv("pubmed_articles.csv", index=False)
print("Data saved to pubmed_articles.csv")


Data saved to pubmed_articles.csv
