Skip to content

Commit

Permalink
feat: improve purl detection
Browse files Browse the repository at this point in the history
  • Loading branch information
louib committed Oct 2, 2023
1 parent 8bb1757 commit b2d4689
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 39 deletions.
32 changes: 16 additions & 16 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,22 +50,6 @@ fn main() -> Result<std::process::ExitCode, Box<dyn std::error::Error>> {
logger::init();
let args = NixToSBOM::parse();

let mut derivations: crate::nix::Derivations = crate::nix::Derivations::default();
if let Some(file_path) = args.file_path {
log::info!("Getting the derivations from file {}.", &file_path);
derivations = nix::Derivation::get_derivations(&file_path)?;
} else if args.current_system {
log::info!("Getting the derivations from the current system.");
derivations = nix::Derivation::get_derivations_for_current_system()?;
} else {
eprintln!("Error: Must provide a file or use the --curent-system argument.");
return Ok(std::process::ExitCode::FAILURE);
}
log::info!("Found {} derivations", derivations.len());

let packages = crate::nix::get_packages(args.metadata_path)?;
log::debug!("Found {} packages in the Nix store", packages.len());

let output_format = match args.format {
Some(f) => match crate::sbom::Format::from_string(&f) {
Some(f) => f,
Expand All @@ -88,6 +72,22 @@ fn main() -> Result<std::process::ExitCode, Box<dyn std::error::Error>> {
None => output_format.get_default_serialization_format(),
};

let mut derivations: crate::nix::Derivations = crate::nix::Derivations::default();
if let Some(file_path) = args.file_path {
log::info!("Getting the derivations from {}", &file_path);
derivations = nix::Derivation::get_derivations(&file_path)?;
} else if args.current_system {
log::info!("Getting the derivations from the current system");
derivations = nix::Derivation::get_derivations_for_current_system()?;
} else {
eprintln!("Error: Must provide a file or use the --curent-system argument");
return Ok(std::process::ExitCode::FAILURE);
}
log::info!("Found {} derivations", derivations.len());

let packages = crate::nix::get_packages(args.metadata_path)?;
log::debug!("Found {} packages in the Nix store", packages.len());

log::info!("Building the package graph");
let package_graph = crate::nix::get_package_graph(&derivations, &packages);
log::info!("{} nodes in the package graph", package_graph.len());
Expand Down
79 changes: 56 additions & 23 deletions src/nix.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::collections::{BTreeSet, HashMap, HashSet};
use std::error::Error;

use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::fs;
use std::io::Error;
use std::process::Command;

use serde::{Deserialize, Deserializer, Serialize};
Expand Down Expand Up @@ -123,23 +124,35 @@ pub type Derivations = HashMap<String, Derivation>;
pub type Packages = HashMap<String, Package>;

impl Derivation {
pub fn get_derivations_for_current_system() -> Result<Derivations, Error> {
pub fn get_derivations_for_current_system() -> Result<Derivations, Box<dyn Error>> {
Derivation::get_derivations(CURRENT_SYSTEM_PATH)
}

pub fn get_derivations(file_path: &str) -> Result<Derivations, Error> {
pub fn get_derivations(file_path: &str) -> Result<Derivations, Box<dyn Error>> {
let output = Command::new("nix")
.arg("show-derivation")
.arg("-r")
.arg(file_path)
.output()?;

if !output.status.success() {
let stderr = String::from_utf8(output.stderr).unwrap();
return Err(format!("Could not get derivations from {}: {}", &file_path, &stderr).into());
}

let flat_derivations: Derivations = serde_json::from_slice(&output.stdout)?;

Ok(flat_derivations)
}

pub fn build_and_get_derivations(file_path: &str, derivation_ref: &str) -> Result<Derivations, Error> {
pub fn to_json(&self) -> Result<String, String> {
return serde_json::to_string_pretty(self).map_err(|e| e.to_string());
}

pub fn build_and_get_derivations(
file_path: &str,
derivation_ref: &str,
) -> Result<Derivations, Box<dyn Error>> {
let derivation_path = format!("{}#{}", file_path, derivation_ref);
let output = Command::new("nix")
.arg("build")
Expand Down Expand Up @@ -510,10 +523,33 @@ pub struct PackageNode {

pub patches: Vec<Derivation>,

pub children: HashSet<String>,
pub children: BTreeSet<String>,
}

impl PackageNode {
pub fn get_name(&self) -> Option<String> {
if self.package.name != "source" {
return Some(self.package.name.to_string());
}

for source in &self.sources {
if let Some(source_name) = source.get_name() {
return Some(source_name.to_string());
}
}

for url in self.main_derivation.get_urls() {
if let Some(project_name) = crate::utils::get_project_name_from_generic_url(&url) {
return Some(project_name.to_string());
}
if let Some(project_name) = crate::utils::get_project_name_from_archive_url(&url) {
return Some(project_name.to_string());
}
}

return None;
}

pub fn get_purl(&self) -> Option<PackageURL> {
let mut response: Option<PackageURL> = None;
let urls = self.main_derivation.get_urls();
Expand All @@ -523,28 +559,24 @@ impl PackageNode {
version = Some(self.package.version.to_string());
}

let mut name: Option<String> = None;
if self.package.name == "source" {
name = match self.sources.get(0) {
Some(source) => source.get_name().cloned(),
None => None,
};
if let Some(n) = &name {
log::debug!("Found package name from source: {}", &n);
} else {
log::debug!(
"Could not find package name anywhere for {}",
&self.to_json().unwrap()
);
name = Some(self.package.name.to_string());
}
let mut name: Option<String> = self.get_name();
if let Some(n) = &name {
log::debug!("Found package name from source: {}", &n);
} else {
log::debug!(
"Could not find package name anywhere for {}",
&self.to_json().unwrap()
);
name = Some(self.package.name.to_string());
}

if let Some(url) = urls.get(0) {
if version.is_none() {
version = crate::utils::get_semver_from_archive_url(url);
}
if version.is_none() {
version = self.main_derivation.env.get("rev").cloned();
}
if url.starts_with("https://crates.io") {}
if url.starts_with("https://bitbucket.org") {}
if url.starts_with("https://registry.npmjs.org") {}
Expand Down Expand Up @@ -638,7 +670,7 @@ impl PackageNode {
}
}

pub type PackageGraph = HashMap<String, PackageNode>;
pub type PackageGraph = BTreeMap<String, PackageNode>;

fn add_visited_children(
package_node: &PackageNode,
Expand Down Expand Up @@ -735,7 +767,7 @@ pub fn get_package_graph(
let mut current_node = PackageNode {
package: package.clone(),
main_derivation: derivation.clone(),
children: HashSet::default(),
children: BTreeSet::default(),
sources: vec![],
patches: vec![],
};
Expand All @@ -750,6 +782,7 @@ pub fn get_package_graph(

while child_derivation_paths.len() != 0 {
let child_derivation_path = child_derivation_paths.pop_last().unwrap();
log::debug!("Visiting {}", &child_derivation_path);
if visited_derivations.contains(&child_derivation_path) {
continue;
}
Expand Down
53 changes: 53 additions & 0 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ lazy_static! {
static ref SEMVER_REGEX: Regex = Regex::new(r"([0-9]+.[0-9]+.[0-9]+)(-[0-9a-zA-Z_]+)?").unwrap();
}

lazy_static! {
static ref PROJECT_NAME_AND_SEMVER_REGEX: Regex =
Regex::new(r"([0-9a-zA-Z_-]+)-([0-9]+.[0-9]+.[0-9]+)(-[0-9a-zA-Z_]+)?").unwrap();
}

lazy_static! {
static ref GIT_PROJECT_URL_REGEX: Regex = Regex::new(r"https?://([0-9a-zA-Z/._-]+)\.git").unwrap();
}
Expand Down Expand Up @@ -77,6 +82,28 @@ pub fn get_git_url_from_generic_url(generic_url: &str) -> Option<String> {
None
}

pub fn get_project_name_from_generic_url(generic_url: &str) -> Option<String> {
let captured_groups = match GITHUB_PROJECT_REGEX.captures(generic_url) {
Some(g) => g,
None => return None,
};
if captured_groups.len() != 0 {
let project_name: String = captured_groups[2].to_string();
return Some(project_name);
}

let captured_groups = match GITLAB_PROJECT_REGEX.captures(generic_url) {
Some(g) => g,
None => return None,
};
if captured_groups.len() != 0 {
let project_name: String = captured_groups[2].to_string();
return Some(project_name);
}

return None;
}

pub fn get_github_url_from_generic_url(generic_url: &str) -> Option<String> {
let captured_groups = match GITHUB_PROJECT_REGEX.captures(generic_url) {
Some(g) => g,
Expand Down Expand Up @@ -200,6 +227,18 @@ pub fn get_semver_from_archive_url(archive_url: &str) -> Option<String> {
return Some(captured_groups[1].to_string());
}

pub fn get_project_name_from_archive_url(archive_url: &str) -> Option<String> {
let archive_filename = archive_url.split("/").last().unwrap();
let captured_groups = match PROJECT_NAME_AND_SEMVER_REGEX.captures(archive_filename) {
Some(g) => g,
None => return None,
};
if captured_groups.len() == 0 {
return None;
}
return Some(captured_groups[1].to_string());
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -283,6 +322,7 @@ mod tests {
"https://bitbucket.org/Doomseeker/doomseeker.git"
);
}

#[test]
pub fn test_get_semver_from_archive() {
let version = crate::utils::get_semver_from_archive_url(
Expand Down Expand Up @@ -320,4 +360,17 @@ mod tests {
assert!(version.is_some());
assert_eq!(version.unwrap(), "3.6.4");
}

#[test]
pub fn test_get_project_name_from_archive() {
let project_name = crate::utils::get_project_name_from_archive_url(
"https://download.gnome.org/core/3.28/3.28.2/sources/libgsf-1.14.43.tar.xz",
);
assert!(project_name.is_some());
assert_eq!(project_name.unwrap(), "libgsf");

// TODO I should also be able to extract the name from an archive url with partial
// semver, for example from this one:
// http://www.leonerd.org.uk/code/libtermkey/libtermkey-0.22.tar.gz
}
}

0 comments on commit b2d4689

Please sign in to comment.