<a href="https://colab.research.google.com/github/samgitnub/python-Intern/blob/main/Python_Development_Intern.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**1. Automation Script:**

 Description: Create a script to automate a repetitive task, such as data
scraping or file organization. Improve efficiency and reduce manual
effort.
o Why: Automation scripts save time and reduce human error in repetitive
tasks.

Organize files in a folder based on their types (e.g., move images, documents, and videos into separate subfolders)



In [1]:
import os
import shutil

# Function to organize files by type
def organize_files_by_type(folder_to_organize):
    # File type categories and their extensions
    file_types = {
        'Images': ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff'],
        'Documents': ['.pdf', '.docx', '.doc', '.txt', '.xlsx', '.pptx'],
        'Videos': ['.mp4', '.mkv', '.avi', '.mov', '.wmv'],
        'Music': ['.mp3', '.wav', '.aac'],
        'Archives': ['.zip', '.rar', '.7z', '.tar', '.gz']
    }

    # Ensure the folder exists
    if not os.path.exists(folder_to_organize):  # Use folder_to_organize here
        print(f"Error: Folder '{folder_to_organize}' does not exist.")
        return

    # Create subfolders for each category
    for category in file_types.keys():
        category_folder = os.path.join(folder_to_organize, category) # Use folder_to_organize here
        os.makedirs(category_folder, exist_ok=True)

    # Iterate over files in the folder
    for file_name in os.listdir(folder_to_organize):  # Use folder_to_organize here
        file_path = os.path.join(folder_to_organize, file_name)  # Use folder_to_organize here

        # Skip if it's a folder
        if os.path.isdir(file_path): # Check if the current item is a directory using its full path
            continue

        # Check the file extension and move the file to the appropriate folder
        file_extension = os.path.splitext(file_name)[1].lower()
        moved = False
        for category, extensions in file_types.items():
            if file_extension in extensions:
                shutil.move(file_path, os.path.join(folder_to_organize, category, file_name)) # Use file_path for the source and folder_to_organize for the destination
                moved = True
                print(f"Moved: {file_name} to {category}/")
                break

        # If the file doesn't match any category, move it to "Others"
        if not moved:
            others_folder = os.path.join(folder_to_organize, 'Others') # Use folder_to_organize here
            os.makedirs(others_folder, exist_ok=True)
            shutil.move(file_path, os.path.join(others_folder, file_name))
            print(f"Moved: {file_name} to Others/")

    print("File organization complete!")

# Example usage
if __name__ == "__main__":
    folder_to_organize = r"C:\Users\Shaik Sameena\Documents\flower_photos\flower_photos" # Define the folder path here
    organize_files_by_type(folder_to_organize)

Error: Folder 'C:\Users\Shaik Sameena\Documents\flower_photos\flower_photos' does not exist.


**2. Web Scraper:**
 Description: Develop a web scraper to extract and process data from
websites. Implement techniques to handle dynamic content and avoid
being blocked.
o Why: Web scraping is useful for collecting data from the web for analysis
and research.

In [2]:
!pip install selenium
!apt-get update # to update ubuntu to correctly run apt install
!apt install chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
import os
os.environ['PATH'] += ":/usr/bin/chromedriver"
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup


# Initialize the WebDriver
def init_driver():
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Run in headless mode
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("window-size=1920,1080")
    chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36")
    # Service object is not required when running in Colab
    # since the chromedriver path is added to the environment.
    # service = Service(executable_path=os.environ.get("CHROMEDRIVER_PATH"))
    return webdriver.Chrome(options=chrome_options)
    # Scrape job listings from a sample job portal
def scrape_jobs(url):
    driver = init_driver()
    driver.get(url)
    time.sleep(3)  # Allow the page to load

    # Scroll the page to load more content (if applicable)
    for _ in range(3):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)

    # Extract page source and parse with BeautifulSoup
    soup = BeautifulSoup(driver.page_source, "html.parser")
    driver.quit()

    # Parse job titles, companies, and locations
    jobs = []
    job_cards = soup.find_all("div", class_="job_seen_beacon")  # Adjust this based on the website's structure
    for card in job_cards:
        title = card.find("h2", class_="jobTitle").get_text(strip=True) if card.find("h2", class_="jobTitle") else "N/A"
        company = card.find("span", class_="companyName").get_text(strip=True) if card.find("span", class_="companyName") else "N/A"
        location = card.find("div", class_="companyLocation").get_text(strip=True) if card.find("div", class_="companyLocation") else "N/A"
        jobs.append({"Title": title, "Company": company, "Location": location})

    return jobs

# Save the scraped data to a file
def save_to_csv(data, filename="jobs.csv"):
    import csv
    with open(filename, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=["Title", "Company", "Location"])
        writer.writeheader()
        writer.writerows(data)

# Example usage
if __name__ == "__main__":
    url = "https://www.indeed.com/jobs?q=software+engineer&l=remote"
    scraped_data = scrape_jobs(url)


Collecting selenium
  Downloading selenium-4.26.1-py3-none-any.whl.metadata (7.1 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.27.0-py3-none-any.whl.metadata (8.6 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.11.1-py3-none-any.whl.metadata (4.7 kB)
Collecting sortedcontainers (from trio~=0.17->selenium)
  Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Downloading selenium-4.26.1-py3-none-any.whl (9.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m45.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trio-0.27.0-py3-none-any.whl (481 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m481.7/481.7 kB[0m [31m24.2 MB/s

In [3]:
save_to_csv(scraped_data, filename="/content/jobs.csv")


**3. Data Analysis Tool:**

o Description: Build a tool to analyse and visualize datasets. Provide
insights through interactive graphs and charts.
o Why: Data analysis tools help in making data-driven decisions by
visualizing trends and patterns.




In [4]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import dash
from dash import dcc, html
from dash.dependencies import Input, Output

# Create a sample Dash app
app = dash.Dash(__name__)

# Load a sample dataset (you can replace this with your dataset)
url = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv"
df = pd.read_csv(url)

# Basic Data Insights
def get_data_insights(df):
    # Display basic insights
    info = df.info()
    summary = df.describe()
    return info, summary

# Create an interactive scatter plot using Plotly
def create_scatter_plot(df):
    fig = px.scatter(df, x="total_bill", y="tip", color="sex",
                     title="Scatter Plot: Total Bill vs Tip")
    return fig

# Create an interactive bar chart using Plotly
def create_bar_chart(df):
    fig = px.bar(df, x="day", y="total_bill", color="sex",
                 title="Bar Chart: Total Bill by Day and Sex")
    return fig

# Layout for the Dash app
app.layout = html.Div([
    html.H1("Interactive Data Analysis Tool", style={'textAlign': 'center'}),

    # Data Insights Section
    html.Div([
        html.H3("Basic Data Insights:"),
        html.Pre(id="data-info"),
        html.Pre(id="data-summary"),
    ], style={'padding': 20}),

    # Graphs Section
    html.Div([
        html.H3("Interactive Graphs:"),

        dcc.Graph(id='scatter-plot'),
        dcc.Graph(id='bar-chart'),
    ], style={'padding': 20}),
])

# Callback to update the data insights
@app.callback(
    [Output("data-info", "children"), Output("data-summary", "children")],
    Input("scatter-plot", "id")
)
def update_insights(_):
    info, summary = get_data_insights(df)
    return info, summary

# Callback to update the scatter plot
@app.callback(
    Output('scatter-plot', 'figure'),
    Input('scatter-plot', 'id')
)
def update_scatter_plot(_):
    return create_scatter_plot(df)

# Callback to update the bar chart
@app.callback(
    Output('bar-chart', 'figure'),
    Input('bar-chart', 'id')
)
def update_bar_chart(_):
    return create_bar_chart(df)

if __name__ == "__main__":
    app.run_server(debug=True)


ModuleNotFoundError: No module named 'dash'

In [None]:
!pip install dash

**4. RESTful API:**

o Description: Implement a RESTful API using Flask or Django for a
specific service. Enable CRUD operations and ensure secure access.
o Why: Building a RESTful API provides experience with backend
development and service integration.
o Tasks:
▪ Set up the project with Flask or Django.
▪ Implement CRUD operations for the service.

In [None]:
pip install flask flask-restful flask-sqlalchemy


In [None]:
from flask import Flask, Blueprint
from flask_sqlalchemy import SQLAlchemy

app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///example.db'
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False

db = SQLAlchemy(app)

from flask import Blueprint

api_blueprint = Blueprint('api',__name__) # Removed the extra space before this line # Now importing from the 'routes.py' file
app.register_blueprint(api_blueprint)

if __name__ == "__main__":
    app.run(debug=True)

In [None]:
# models.py
from __main__ import db  # Import db directly from the main script/session

class Task(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    title = db.Column(db.String(100), nullable=False)
    description = db.Column(db.String(200))
    done = db.Column(db.Boolean, default=False)

    def to_dict(self):
        return {
            'id': self.id,
            'title': self.title,
            'description': self.description,
            'done': self.done
        }

In [None]:
from flask import Flask, Blueprint, abort, request # Import request here
from flask_sqlalchemy import SQLAlchemy

app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///example.db'
from flask import Flask, Blueprint, abort, request # Import request here
from flask_sqlalchemy import SQLAlchemy

app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///example.db'
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False

db = SQLAlchemy(app)

In [None]:
!curl -X POST -H "Content-Type: application/json" -d '{"title": "Test Task"}' http://127.0.0.1:5000/tasks


In [None]:
!curl http://127.0.0.1:5000/tasks